Beispiel #1
0
def basic_pipelined_training_step(model,
                                  opts,
                                  learning_rate,
                                  infeed,
                                  outfeed,
                                  iterations_per_step=1):
    def first_stage(learning_rate, image, label, pipeline_stage=None):
        return learning_rate, pipeline_stage(image), label,

    def final_stage(learning_rate, x, label, pipeline_stage=None):
        x = pipeline_stage(x)
        loss, cross_entropy, accuracy = calculate_loss(x, label, opts)
        return loss, cross_entropy, accuracy, learning_rate / opts["lr_scale"]

    model_stages = model(opts)
    computational_stages = [
        partial(first_stage, pipeline_stage=model_stages[x])
        for x in range(len(model_stages) - 1)
    ]
    computational_stages.append(
        partial(final_stage, pipeline_stage=model_stages[-1]))

    def optimizer_function(loss, _, __, lr):
        optimizer = get_optimizer(opts)(lr)
        return pipelining_ops.OptimizerFunctionOutput(
            optimizer, loss * opts["loss_scaling"])

    options = None
    amps = opts['available_memory_proportion']
    if amps and len(amps) > 1:
        # Map values to the different pipeline stages
        options = []
        for i in range(len(amps) // 2):
            options.append(
                pipelining_ops.PipelineStageOptions(
                    {"availableMemoryProportion": amps[2 * i]},
                    {"availableMemoryProportion": amps[2 * i + 1]}))

    return pipelining_ops.pipeline(
        computational_stages=computational_stages,
        pipeline_depth=int(opts['pipeline_depth']),
        repeat_count=iterations_per_step,
        inputs=[learning_rate],
        infeed_queue=infeed,
        outfeed_queue=outfeed,
        optimizer_function=optimizer_function,
        forward_propagation_stages_poplar_options=options,
        backward_propagation_stages_poplar_options=options,
        pipeline_schedule=next(
            p for p in list(pipelining_ops.PipelineSchedule)
            if opts["pipeline_schedule"] == str(p).split(".")[-1]),
        offload_weight_update_variables=False
        if opts['disable_variable_offloading'] else True,
        name="Pipeline")
Beispiel #2
0
        def infer(lr, infeed, outfeed, gradient_accumulation_count):
            pipeline_op = pipelining_ops.pipeline(
                self.computational_stages,
                gradient_accumulation_count=gradient_accumulation_count,
                gradient_accumulation_dtype=self.dtype,
                inputs=[lr],
                infeed_queue=infeed,
                outfeed_queue=outfeed,
                device_mapping=self.device_mapping)

            return pipeline_op
Beispiel #3
0
 def model(lr):
     pipeline_op = pipelining_ops.pipeline(
         computational_stages=[stage1, stage2],
         device_mapping=[0, 0],
         gradient_accumulation_count=gradient_accumulation_count,
         inputs=[lr],
         infeed_queue=infeed_queue,
         repeat_count=2,
         outfeed_queue=outfeed_queue,
         optimizer_function=optimizer_function,
         name="Pipeline")
     return pipeline_op
Beispiel #4
0
 def prediction_pipeline():
     spec = self._call_model_fn(model_fn_lib.ModeKeys.PREDICT)
     return pipelining_ops.pipeline(
         infeed_queue=self._infeed_queue,
         outfeed_queue=self._outfeed_queue,
         computational_stages=spec.computational_stages,
         gradient_accumulation_count=spec.gradient_accumulation_count,
         repeat_count=self._config.ipu_run_config.iterations_per_loop,
         inputs=spec.inputs,
         device_mapping=spec.device_mapping,
         pipeline_schedule=spec.pipeline_schedule,
         name="ipu_pipeline_estimator_predict")
Beispiel #5
0
        def train(lr, infeed, outfeed, gradient_accumulation_count):
            pipeline_op = pipelining_ops.pipeline(
                self.computational_stages,
                gradient_accumulation_count=gradient_accumulation_count,
                gradient_accumulation_dtype=self.dtype,
                inputs=[lr],
                infeed_queue=infeed,
                outfeed_queue=outfeed,
                device_mapping=self.device_mapping,
                optimizer_function=self.optimizer_function,
                offload_weight_update_variables=False)

            return pipeline_op
Beispiel #6
0
 def training_pipeline():
     spec = self._call_model_fn(model_fn_lib.ModeKeys.TRAIN)
     return pipelining_ops.pipeline(
         infeed_queue=self._infeed_queue,
         outfeed_queue=self._outfeed_queue,
         computational_stages=spec.computational_stages,
         gradient_accumulation_count=spec.gradient_accumulation_count,
         repeat_count=self._config.ipu_run_config.iterations_per_loop,
         inputs=spec.inputs,
         optimizer_function=spec.optimizer_function,
         device_mapping=spec.device_mapping,
         pipeline_schedule=spec.pipeline_schedule,
         outfeed_loss=True,
         offload_weight_update_variables=spec.
         offload_weight_update_variables,
         name="ipu_pipeline_estimator_train")
Beispiel #7
0
        def evaluation_pipeline():
            spec = self._call_model_fn(model_fn_lib.ModeKeys.EVAL)

            assert not self._captured_eval_metrics_fn
            assert spec.eval_metrics_fn
            self._captured_eval_metrics_fn = spec.eval_metrics_fn

            return pipelining_ops.pipeline(
                infeed_queue=self._infeed_queue,
                outfeed_queue=self._outfeed_queue,
                computational_stages=spec.computational_stages,
                gradient_accumulation_count=spec.gradient_accumulation_count,
                repeat_count=self._config.ipu_run_config.iterations_per_loop,
                inputs=spec.inputs,
                device_mapping=spec.device_mapping,
                pipeline_schedule=spec.pipeline_schedule,
                name="ipu_pipeline_estimator_eval")
Beispiel #8
0
def basic_pipelined_training_step(model,
                                  opts,
                                  learning_rate,
                                  infeed,
                                  outfeed,
                                  iterations_per_step=1):
    def first_stage(learning_rate, image, label, pipeline_stage=None):
        return learning_rate, pipeline_stage(image), label,

    def final_stage(learning_rate, x, label, pipeline_stage=None):
        x = pipeline_stage(x)
        loss, cross_entropy, accuracy = calculate_loss(x, label, opts)
        return learning_rate, loss, cross_entropy, accuracy

    model_stages = model(opts)
    computational_stages = [
        partial(first_stage, pipeline_stage=model_stages[x])
        for x in range(len(model_stages) - 1)
    ]
    computational_stages.append(
        partial(final_stage, pipeline_stage=model_stages[-1]))

    def optimizer_stage(lr, loss, cross_entropy, accuracy):
        grads_and_vars = calculate_gradients(
            loss, opts["weight_decay"] * opts['loss_scaling'], opts)
        optimizer = get_optimizer(opts)(lr)
        apply_grads = optimizer.apply_gradients(grads_and_vars=grads_and_vars)

        return loss / opts["loss_scaling"], cross_entropy, accuracy, lr * opts[
            'loss_scaling'], lr, apply_grads

    return pipelining_ops.pipeline(
        computational_stages=computational_stages,
        pipeline_depth=int(opts['pipeline_depth']),
        repeat_count=iterations_per_step,
        inputs=[learning_rate / opts['loss_scaling']],
        infeed_queue=infeed,
        outfeed_queue=outfeed,
        optimizer_stage=optimizer_stage,
        pipeline_schedule=next(
            p for p in list(pipelining_ops.PipelineSchedule)
            if opts["pipeline_schedule"] == str(p).split(".")[-1]),
        name="Pipeline")
Beispiel #9
0
def basic_pipelined_training_step(model,
                                  opts,
                                  learning_rate,
                                  infeed,
                                  outfeed,
                                  iterations_per_step=1):
    def first_stage(learning_rate, image, label, pipeline_stage=None):
        return learning_rate, pipeline_stage(image), label,

    def final_stage(learning_rate, x, label, pipeline_stage=None):
        x = pipeline_stage(x)
        loss, cross_entropy, accuracy = calculate_loss(x, label, opts)
        return loss, cross_entropy, accuracy, learning_rate / opts[
            "loss_scaling"]

    model_stages = model(opts)
    computational_stages = [
        partial(first_stage, pipeline_stage=model_stages[x])
        for x in range(len(model_stages) - 1)
    ]
    computational_stages.append(
        partial(final_stage, pipeline_stage=model_stages[-1]))

    def optimizer_function(loss, _, __, lr):
        optimizer = get_optimizer(opts)(lr)
        return pipelining_ops.OptimizerFunctionOutput(
            optimizer, loss * opts["loss_scaling"])

    return pipelining_ops.pipeline(
        computational_stages=computational_stages,
        pipeline_depth=int(opts['pipeline_depth']),
        repeat_count=iterations_per_step,
        inputs=[learning_rate],
        infeed_queue=infeed,
        outfeed_queue=outfeed,
        optimizer_function=optimizer_function,
        pipeline_schedule=next(
            p for p in list(pipelining_ops.PipelineSchedule)
            if opts["pipeline_schedule"] == str(p).split(".")[-1]),
        name="Pipeline")
Beispiel #10
0
    def _internal_run_loop(self, infeed_queue, outfeed_queue, repeat_count,
                           mode):
        training = mode == ModeKeys.TRAIN

        # Dictionary mapping reference tensors to computed tensors.
        tensor_dict = OrderedDict()

        def get_inputs_and_targets(*args):
            args = nest.flatten(args)
            num_inputs = len(self.inputs)
            inputs = list(args[:num_inputs])
            targets = list(args[num_inputs:])
            assert len(inputs) == num_inputs

            # "Execute" the input layers
            executed_inputs = []
            for op, layer, tensor in zip(self.inputs, self._input_layers,
                                         inputs):
                executed_inputs.append(layer(tensor))
                tensor_dict[str(id(op))] = executed_inputs[-1]
                if isinstance(op, ops.Tensor) and isinstance(
                        tensor, ops.Tensor):
                    try:
                        tensor.set_shape(tensor.shape.merge_with(op.shape))
                    except ValueError:
                        logging.warning(
                            'Model was constructed with shape {} for input {}, but it '
                            'was re-called on a Tensor with incompatible '
                            'shape {}.'.format(op, op.shape, tensor.shape))
            return executed_inputs, targets

        def main_body(stage_id, *args):
            if stage_id == self.stages[0]:
                inputs, targets = get_inputs_and_targets(*args)
            else:
                inputs = list(args[:len(tensor_dict)])
                targets = list(args[len(inputs):])

            # Update the tensor dict with the inputs.
            for idx, k in enumerate(tensor_dict):
                tensor_dict[k] = inputs[idx]

            for i in self._stage_node_ids[stage_id]:
                node = self._post_order_node_execution[len(self.inputs) + i]
                if node._pipeline_stage == stage_id:  # pylint: disable=protected-access
                    self._execute_layer_node(node, training, tensor_dict)  # pylint: disable=protected-access

            if stage_id == self.stages[-1]:
                return self._get_output_tensors(tensor_dict)  # pylint: disable=protected-access
            return list(tensor_dict.values()) + targets

        def inference_body(stage_id, *args):
            return main_body(stage_id, *args)

        def training_body(stage_id, *args):
            x = main_body(stage_id, *args)
            if stage_id == self.stages[-1]:
                self._set_output_attrs(x)
                targets = args[-len(self.outputs)]
                return self._add_loss(targets)
            return x

        def optimizer_function(loss, *_):
            if not self.trainable_weights:
                raise ValueError(
                    "Model must have at least one trainable parameter.")

            opt = self._get_optimizer()
            return pipelining_ops.OptimizerFunctionOutput(opt, loss)

        # The pipeline stages, a set of feed forward functions.
        if mode == ModeKeys.PREDICT:
            stage_fn = inference_body
        else:
            stage_fn = training_body

        stages = []
        for stage in self.stages:
            stages.append(partial(stage_fn, stage))

        opt = optimizer_function if training else None

        pipeline = pipelining_ops.pipeline(
            stages,
            gradient_accumulation_count=self.gradient_accumulation_count,
            repeat_count=repeat_count,
            inputs=[],
            infeed_queue=infeed_queue,
            outfeed_queue=outfeed_queue,
            optimizer_function=opt,
            device_mapping=self.device_mapping,
            pipeline_schedule=self.pipeline_schedule,
            forward_propagation_stages_poplar_options=self.
            forward_propagation_stages_poplar_options,
            backward_propagation_stages_poplar_options=self.
            backward_propagation_stages_poplar_options,
            weight_update_poplar_options=self.weight_update_poplar_options,
            replicated_optimizer_state_sharding=self.
            replicated_optimizer_state_sharding,
            offload_activations=self.offload_activations,
            offload_gradient_accumulation_buffers=self.
            offload_gradient_accumulation_buffers,
            replicated_weight_sharding=self.replicated_weight_sharding,
            offload_weights=self.offload_weights,
            name=self.name,
            **self.args)

        return pipeline.outputs
Beispiel #11
0
    def _internal_run_loop(self, infeed_queue, outfeed_queue, repeat_count,
                           mode):
        training = mode == ModeKeys.TRAIN

        # Plain functions to build a stage
        def call_inference_stage(stage_id, inputs):
            # Record the inputs of the first stage
            if stage_id == 0 and not self.inputs:
                self._set_input_attrs(inputs)

            x = inputs
            for l in self.stages[stage_id]:
                kwargs = {}
                argspec = tf_inspect.getfullargspec(l.call).args
                if 'training' in argspec:
                    kwargs['training'] = training
                x = l(x, **kwargs)

            return x

        def call_training_stage(stage_id, inputs, targets):

            x = call_inference_stage(stage_id, inputs)

            # Recompile the model now that we know the inputs and outputs, and
            # then create the losses and metrics
            if stage_id == len(self.stages) - 1:
                self._set_output_attrs(x)
                return self._add_loss(targets)

            return x, targets

        # Function for generating the optimizer config for pipelines.
        def optimizer_function(loss, *_):

            if not self.trainable_weights:
                raise ValueError(
                    "Model must have at least one trainable parameter.")

            opt = self._get_optimizer()

            return pipelining_ops.OptimizerFunctionOutput(opt, loss)

        # The pipeline stages, a set of feed forward functions.
        if mode == ModeKeys.PREDICT:
            stage_fn = call_inference_stage
        else:
            stage_fn = call_training_stage

        stages = []
        for stage_id in range(len(self.stages)):
            stages.append(partial(stage_fn, stage_id))

        opt = optimizer_function if training else None

        pipeline = pipelining_ops.pipeline(
            stages,
            gradient_accumulation_count=self.gradient_accumulation_count,
            repeat_count=repeat_count,
            inputs=[],
            infeed_queue=infeed_queue,
            outfeed_queue=outfeed_queue,
            optimizer_function=opt,
            device_mapping=self.device_mapping,
            pipeline_schedule=self.pipeline_schedule,
            forward_propagation_stages_poplar_options=self.
            forward_propagation_stages_poplar_options,
            backward_propagation_stages_poplar_options=self.
            backward_propagation_stages_poplar_options,
            weight_update_poplar_options=self.weight_update_poplar_options,
            replicated_optimizer_state_sharding=self.
            replicated_optimizer_state_sharding,
            offload_activations=self.offload_activations,
            offload_gradient_accumulation_buffers=self.
            offload_gradient_accumulation_buffers,
            replicated_weight_sharding=self.replicated_weight_sharding,
            offload_weights=self.offload_weights,
            name=self.name,
            **self.args)

        return pipeline.outputs
Beispiel #12
0
def build_network(infeed,
                  outfeed,
                  iterations_per_step=1,
                  bert_config=None,
                  opts=None,
                  learning_rate=None,
                  is_training=True):
    # build model
    if opts["groupbert"]:
        logger.info(
            f"************* Using GroupBERT model architecture *************")
        pipeline_model = bert_ipu.GroupBertModel(bert_config,
                                                 is_training=is_training)
    else:
        pipeline_model = bert_ipu.BertModel(bert_config,
                                            is_training=is_training)

    # build stages & device mapping
    computational_stages = build_squad_pipeline_stages(pipeline_model,
                                                       bert_config, opts,
                                                       is_training)
    device_mapping = opts['device_mapping']
    logger.info(
        f"************* computational stages: *************\n{computational_stages}"
    )
    logger.info(
        f"************* device mapping: *************\n{device_mapping}")

    # Set IPU-specific available memory proportion
    if isinstance(opts['available_memory_proportion'], float):
        available_memory_proportion_list = [
            str(opts['available_memory_proportion'])
        ] * len(device_mapping)
    else:
        available_memory_proportion_list = [
            str(opts['available_memory_proportion'][device])
            for device in device_mapping
        ]

    if len(available_memory_proportion_list) != len(device_mapping):
        raise ValueError(
            "The available_memory_proportion list must be the same length as the number of stages in the pipeline."
        )

    options = [
        ipu.pipelining_ops.PipelineStageOptions(
            matmul_options={
                "availableMemoryProportion":
                str(opts["available_memory_proportion"]),
                "partialsType":
                opts["partials_type"]
            },
            convolution_options={"partialsType": opts["partials_type"]})
    ] * len(computational_stages)

    if is_training:
        # define optimizer
        def optimizer_function(learning_rate, total_loss, *args):
            optimizer = get_optimizer(learning_rate, opts['loss_scaling'],
                                      opts['replicas'], opts)
            if opts["replicas"] > 1:
                optimizer = ipu.optimizers.cross_replica_optimizer.CrossReplicaOptimizer(
                    optimizer)
            return pipelining_ops.OptimizerFunctionOutput(
                optimizer, total_loss * opts['loss_scaling'])

        return pipelining_ops.pipeline(
            computational_stages=computational_stages,
            gradient_accumulation_count=opts['gradient_accumulation_count'],
            repeat_count=iterations_per_step,
            inputs=[learning_rate],
            infeed_queue=infeed,
            outfeed_queue=outfeed,
            device_mapping=device_mapping,
            forward_propagation_stages_poplar_options=options,
            backward_propagation_stages_poplar_options=options,
            offload_weight_update_variables=opts['variable_offloading'],
            optimizer_function=optimizer_function,
            recomputation_mode=ipu.ops.pipelining_ops.RecomputationMode[
                opts['recomputation_mode']],
            name="Pipeline")
    else:
        return pipelining_ops.pipeline(
            computational_stages=computational_stages,
            gradient_accumulation_count=opts['gradient_accumulation_count'],
            repeat_count=iterations_per_step,
            inputs=[],
            infeed_queue=infeed,
            outfeed_queue=outfeed,
            device_mapping=device_mapping,
            forward_propagation_stages_poplar_options=options,
            backward_propagation_stages_poplar_options=options,
            offload_weight_update_variables=opts['variable_offloading'],
            name="Pipeline")