Ejemplo n.º 1
0
    def _create_master_weight(self, param):
        assert isinstance(self.helper, LayerHelper)

        var_name = param.name + "_fp32_master"
        var_name = unique_name.generate(var_name)
        var = layers.create_global_var(name=var_name,
                                       shape=param.shape,
                                       value=0,
                                       dtype='float32',
                                       persistable=True)
        block = self.helper.startup_program.global_block()
        block.append_op(type="cast",
                        inputs={"X": [param]},
                        outputs={"Out": [var]},
                        attrs={
                            "in_dtype": param.dtype,
                            "out_dtype": core.VarDesc.VarType.FP32
                        })
        self._master_weights[param.name] = var
        return var
Ejemplo n.º 2
0
    def __init__(self,
                 name_scope,
                 hidden_size,
                 param_attr=None,
                 bias_attr=None,
                 gate_activation=None,
                 activation=None,
                 dtype='float32'):
        super(BasicGRUUnit, self).__init__(name_scope, dtype)
        # reserve old school _full_name and _helper for static graph save load
        self._full_name = unique_name.generate(name_scope + "/" +
                                               self.__class__.__name__)
        self._helper = LayerObjectHelper(self._full_name)

        self._name = name_scope
        self._hiden_size = hidden_size
        self._param_attr = param_attr
        self._bias_attr = bias_attr
        self._gate_activation = gate_activation or layers.sigmoid
        self._activation = activation or layers.tanh
        self._dtype = dtype
Ejemplo n.º 3
0
 def __init__(self,
              name=None,
              quant_bits=8,
              dtype='float32',
              quant_on_weight=False):
     super(FakeQuantAbsMax, self).__init__()
     self._quant_bits = quant_bits
     self._name = name
     scale_prefix = "{}.scale".format(
         name) if name else 'quant_dequant.scale'
     self._scale_name = unique_name.generate(scale_prefix)
     if quant_on_weight:
         scale_attr = ParamAttr(name=self._scale_name,
                                initializer=Constant(0.0),
                                trainable=False)
         self._scale = self.create_parameter(shape=[1],
                                             attr=scale_attr,
                                             dtype=self._dtype)
         self._scale.stop_gradient = True
     else:
         self._scale = None
    def visit_Continue(self, node):
        loop_node_index = self._find_ancestor_loop_index(node)
        assert loop_node_index != -1, "SyntaxError: 'continue' outside loop"
        loop_node = self.ancestor_nodes[loop_node_index]

        # 1. Map the 'break/continue' stmt with an unique boolean variable V.
        variable_name = unique_name.generate(CONTINUE_NAME_PREFIX)

        # 2. Find the first ancestor block containing this 'break/continue', a
        # block can be a node containing stmt list. We should remove all stmts
        # after the 'break/continue' and set the V to True here.
        first_block_index = self._remove_stmts_after_break_continue(
            node, variable_name, loop_node_index)

        # 3. Add 'if V' for stmts in ancestor blocks between the first one
        # (exclusive) and the ancestor loop (inclusive)
        self._replace_if_stmt(loop_node_index, first_block_index, variable_name)

        # 4. For 'continue', set continue to False at the beginning of each loop
        assign_false_node = create_fill_constant_node(variable_name, False)
        loop_node.body.insert(0, assign_false_node)
Ejemplo n.º 5
0
        def _create_node(nodes, api_type):
            assert len(
                nodes
            ) > 1, "The length of BoolOp should be at least 2, but received {}.".format(
                len(nodes))
            if len(nodes) > 2:
                # Creates logic_and/logic_or node recursively.
                pre_assign_node = _create_node(nodes[:2], api_type)
                nodes = [pre_assign_node] + nodes[2:]
            args = [ast_to_source_code(child) for child in nodes]
            new_node_str = "fluid.layers.logical_{}(x={}, y={})".format(
                api_type, args[0], args[1])
            # gast.parse return Module(body=[expr(value=...)])
            new_node = gast.parse(new_node_str).body[0].value
            logic_tensor_name = unique_name.generate(
                LOGIC_AND_PREFIX if 'and' in api_type else LOGIC_OR_PREFIX)
            assign_name, assign_node = create_assign_node(
                logic_tensor_name, new_node)
            self._new_assign_nodes.append(assign_node)

            return assign_name
Ejemplo n.º 6
0
    def to_static_inputs(self, main_program):
        inputs = []
        block = main_program.global_block()
        for input_var in self.args:
            if isinstance(input_var, np.ndarray):
                feed_layer = block.create_var(
                    name=unique_name.generate('feed'),
                    shape=list(input_var.shape),
                    dtype=input_var.dtype,
                    is_data=True,
                    need_check_feed=False)
            elif isinstance(input_var, core.VarBase):
                feed_layer = block.create_var(
                    name=input_var.name,
                    shape=list(input_var.shape),
                    dtype=input_var.dtype,
                    stop_gradient=input_var.stop_gradient,
                    need_check_feed=False)
            else:
                feed_layer = input_var

            inputs.append(feed_layer)
        return inputs
    def visit_Break(self, node):
        loop_node_index = self._find_ancestor_loop_index(node)
        assert loop_node_index != -1, "SyntaxError: 'break' outside loop"
        loop_node = self.ancestor_nodes[loop_node_index]

        # 1. Map the 'break/continue' stmt with an unique boolean variable V.
        variable_name = unique_name.generate(BREAK_NAME_PREFIX)

        # 2. Find the first ancestor block containing this 'break/continue', a
        # block can be a node containing stmt list. We should remove all stmts
        # after the 'break/continue' and set the V to True here.
        first_block_index = self._remove_stmts_after_break_continue(
            node, variable_name, loop_node_index)

        # 3. Add 'if V' for stmts in ancestor blocks between the first one
        # (exclusive) and the ancestor loop (inclusive)
        self._replace_if_stmt(loop_node_index, first_block_index, variable_name)

        # 4. For 'break' add break into condition of the loop.
        assign_false_node = create_fill_constant_node(variable_name, False)
        self._add_stmt_before_cur_node(loop_node_index, assign_false_node)

        cond_var_node = gast.UnaryOp(
            op=gast.Not(),
            operand=gast.Name(
                id=variable_name,
                ctx=gast.Load(),
                annotation=None,
                type_comment=None))
        if isinstance(loop_node, gast.While):
            loop_node.test = gast.BoolOp(
                op=gast.And(), values=[loop_node.test, cond_var_node])
        elif isinstance(loop_node, gast.For):
            parent_node = self.ancestor_nodes[loop_node_index - 1]
            for_to_while = ForToWhileTransformer(parent_node, loop_node,
                                                 cond_var_node)
            for_to_while.transform()
Ejemplo n.º 8
0
 def _create_persistable_tensor(self, name, type, dtype):
     return framework.default_main_program().current_block().create_var(
         name=unique_name.generate(name),
         type=type,
         dtype=dtype,
         persistable=True)
Ejemplo n.º 9
0
    def visit_Return(self, node):
        cur_func_node = self.function_def[-1]
        return_name = unique_name.generate(RETURN_PREFIX)
        self.return_name[cur_func_node].append(return_name)
        max_return_length = self.pre_analysis.get_func_max_return_length(
            cur_func_node)
        parent_node_of_return = self.ancestor_nodes[-2]

        for ancestor_index in reversed(range(len(self.ancestor_nodes) - 1)):
            ancestor = self.ancestor_nodes[ancestor_index]
            cur_node = self.ancestor_nodes[ancestor_index + 1]
            if hasattr(
                    ancestor,
                    "body") and index_in_list(ancestor.body, cur_node) != -1:
                if cur_node == node:
                    self._replace_return_in_stmt_list(ancestor.body, cur_node,
                                                      return_name,
                                                      max_return_length,
                                                      parent_node_of_return)
                self._replace_after_node_to_if_in_stmt_list(
                    ancestor.body, cur_node, return_name,
                    parent_node_of_return)
            elif hasattr(ancestor, "orelse") and index_in_list(
                    ancestor.orelse, cur_node) != -1:
                if cur_node == node:
                    self._replace_return_in_stmt_list(ancestor.orelse,
                                                      cur_node, return_name,
                                                      max_return_length,
                                                      parent_node_of_return)
                self._replace_after_node_to_if_in_stmt_list(
                    ancestor.orelse, cur_node, return_name,
                    parent_node_of_return)

            # If return node in while loop, add `not return_name` in gast.While.test
            if isinstance(ancestor, gast.While):
                cond_var_node = gast.UnaryOp(op=gast.Not(),
                                             operand=gast.Name(
                                                 id=return_name,
                                                 ctx=gast.Load(),
                                                 annotation=None,
                                                 type_comment=None))
                ancestor.test = gast.BoolOp(
                    op=gast.And(), values=[ancestor.test, cond_var_node])
                continue

            # If return node in for loop, add `not return_name` in gast.While.test
            if isinstance(ancestor, gast.For):
                cond_var_node = gast.UnaryOp(op=gast.Not(),
                                             operand=gast.Name(
                                                 id=return_name,
                                                 ctx=gast.Load(),
                                                 annotation=None,
                                                 type_comment=None))
                parent_node = self.ancestor_nodes[ancestor_index - 1]
                for_to_while = ForToWhileTransformer(parent_node, ancestor,
                                                     cond_var_node)
                new_stmts = for_to_while.transform()
                while_node = new_stmts[-1]
                self.ancestor_nodes[ancestor_index] = while_node

            if ancestor == cur_func_node:
                break
Ejemplo n.º 10
0
def create_static_variable_gast_node(name):
    func_code = "{} = paddle.jit.dy2static\
        .data_layer_not_check(name='{}', shape=[-1], dtype='float32')".format(
        name, unique_name.generate(name))
    return gast.parse(func_code).body[0]
Ejemplo n.º 11
0
def save_vars(executor,
              dirname,
              main_program=None,
              vars=None,
              predicate=None,
              filename=None):
    """
    This API saves specific variables in the `Program` to files.

    There are two ways to specify the variables to be saved: set variables in
    a list and assign it to the `vars`, or use the `predicate` function to select
    variables that make `predicate(variable) == True`. The first way has a higher priority.

    The `dirname` is used to specify the folder where to save variables.
    If you prefer to save variables in separate files in the `dirname` floder,
    do not set `filename`. If you prefer to save all variables in a single file,
    use `filename` to specify it.

    Args:
        executor(Executor): The executor to run for saving variables.
        dirname(str, optional): The folder where to save variables.
                            When you need to save the parameter to the memory, set it to None.
        main_program(Program, optional): The program whose variables will be saved.
                                    If it is None, the default main program will
                                    be used automatically.
                                    Default: None
        vars(list[Variable], optional): The list contains all variables to be saved.
                                        Default: None
        predicate(function, optional): The function selects the variables that make
                                       `predicate(variable) == True`.
                                       Default: None
        filename(str, optional): If you prefer to save all variables in a single file,
                                 use `filename` to specify it. Otherwise, let `filename` be None.
                                 Default: None

    Returns:
        str: When saving parameters to a file, returns None.
             When saving parameters to memory, returns a binary string containing parameters.

    Raises:
        TypeError: If `main_program` is not an instance of Program nor None.

    Examples:
        .. code-block:: python

            import paddle.fluid as fluid

            main_prog = fluid.Program()
            startup_prog = fluid.Program()
            with fluid.program_guard(main_prog, startup_prog):
                data = fluid.layers.data(name="img", shape=[64, 784], append_batch_size=False)
                w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
                b = fluid.layers.create_parameter(shape=[200], dtype='float32', name='fc_b')
                hidden_w = fluid.layers.matmul(x=data, y=w)
                hidden_b = fluid.layers.elementwise_add(hidden_w, b)
            place = fluid.CPUPlace()
            exe = fluid.Executor(place)
            exe.run(startup_prog)

            # The first usage: use `vars` to set the saved variables.
            var_list = [w, b]
            path = "./my_paddle_vars"
            fluid.io.save_vars(executor=exe, dirname=path, vars=var_list,
                            filename="vars_file")
            # w and b will be save in a file named "var_file".

            # The second usage: use `predicate` to select the saved variable.
            def name_has_fc(var):
                res = "fc" in var.name
                return res
            param_path = "./my_paddle_model"
            fluid.io.save_vars(executor=exe, dirname=param_path, main_program=main_prog, vars=None, predicate = name_has_fc)
            # all variables whose names contain "fc " are saved.
    """
    save_to_memory = False
    if dirname is None and filename is None:
        save_to_memory = True

    main_program = _get_valid_program(main_program)

    if vars is None:
        return save_vars(
            executor,
            main_program=main_program,
            dirname=dirname,
            vars=list(filter(predicate, main_program.list_vars())),
            filename=filename)
    else:
        params_var_name = unique_name.generate("saved_params")
        # give warning when there is no var in model
        if len(list(vars)) == 0:
            warnings.warn(
                "no variable in your model, please ensure there are any variables in your model to save"
            )
            return None

        save_program = Program()
        save_block = save_program.global_block()

        save_var_map = {}
        for each_var in vars:
            # NOTE: don't save the variable which type is RAW
            if each_var.type == core.VarDesc.VarType.RAW:
                continue
            new_var = _clone_var_in_block_(save_block, each_var)
            if filename is None and save_to_memory is False:
                save_file_path = os.path.join(
                    os.path.normpath(dirname), new_var.name)
                save_block.append_op(
                    type='save',
                    inputs={'X': [new_var]},
                    outputs={},
                    attrs={'file_path': os.path.normpath(save_file_path)})
            else:
                save_var_map[new_var.name] = new_var

        if filename is not None or save_to_memory:
            save_var_list = []
            for name in sorted(save_var_map.keys()):
                save_var_list.append(save_var_map[name])

            save_path = str()
            if save_to_memory is False:
                save_path = os.path.join(os.path.normpath(dirname), filename)

            saved_params = save_block.create_var(
                type=core.VarDesc.VarType.RAW, name=params_var_name)
            saved_params.desc.set_persistable(True)
            save_block.append_op(
                type='save_combine',
                inputs={'X': save_var_list},
                outputs={'Y': saved_params},
                attrs={
                    'file_path': save_path,
                    'save_to_memory': save_to_memory
                })

        #NOTE(zhiqiu): save op will add variable kLookupTablePath in save_program.desc,
        # which leads to diff on save_program and its desc. Call _sync_with_cpp
        # to keep consistency.
        save_program._sync_with_cpp()
        executor.run(save_program)
        if save_to_memory:
            return global_scope().find_var(params_var_name).get_bytes()
Ejemplo n.º 12
0
    def get_for_stmt_nodes(self, node):
        # TODO: consider for - else in python
        if not self.name_visitor.is_control_flow_loop(node):
            return [node]

        # TODO: support non-range case
        range_call_node = self.get_for_range_node(node)
        if range_call_node is None:
            return [node]

        if not isinstance(node.target, gast.Name):
            return [node]
        iter_var_name = node.target.id

        init_stmt, cond_stmt, change_stmt = self.get_for_args_stmts(
            iter_var_name, range_call_node.args)

        loop_var_names, create_var_names = self.name_visitor.get_loop_var_names(
            node)
        new_stmts = []
        # Python can create variable in loop and use it out of loop, E.g.
        #
        # for x in range(10):
        #     y += x
        # print(x) # x = 10
        #
        # We need to create static variable for those variables
        for name in create_var_names:
            if "." not in name:
                new_stmts.append(create_static_variable_gast_node(name))

        new_stmts.append(init_stmt)

        # for x in range(10) in dygraph should be convert into static tensor + 1 <= 10
        for name in loop_var_names:
            new_stmts.append(to_static_variable_gast_node(name))

        condition_func_node = gast.FunctionDef(
            name=unique_name.generate(FOR_CONDITION_PREFIX),
            args=gast.arguments(args=[
                gast.Name(id=name,
                          ctx=gast.Param(),
                          annotation=None,
                          type_comment=None) for name in loop_var_names
            ],
                                posonlyargs=[],
                                vararg=None,
                                kwonlyargs=[],
                                kw_defaults=None,
                                kwarg=None,
                                defaults=[]),
            body=[gast.Return(value=cond_stmt)],
            decorator_list=[],
            returns=None,
            type_comment=None)
        for name in loop_var_names:
            if "." in name:
                rename_transformer = RenameTransformer(condition_func_node)
                rename_transformer.rename(
                    name, unique_name.generate(GENERATE_VARIABLE_PREFIX))
        new_stmts.append(condition_func_node)

        new_body = node.body
        new_body.append(change_stmt)
        new_body.append(
            gast.Return(
                value=generate_name_node(loop_var_names, ctx=gast.Load())))
        body_func_node = gast.FunctionDef(
            name=unique_name.generate(FOR_BODY_PREFIX),
            args=gast.arguments(args=[
                gast.Name(id=name,
                          ctx=gast.Param(),
                          annotation=None,
                          type_comment=None) for name in loop_var_names
            ],
                                posonlyargs=[],
                                vararg=None,
                                kwonlyargs=[],
                                kw_defaults=None,
                                kwarg=None,
                                defaults=[]),
            body=new_body,
            decorator_list=[],
            returns=None,
            type_comment=None)
        for name in loop_var_names:
            if "." in name:
                rename_transformer = RenameTransformer(body_func_node)
                rename_transformer.rename(
                    name, unique_name.generate(GENERATE_VARIABLE_PREFIX))
        new_stmts.append(body_func_node)

        while_loop_node = create_while_node(condition_func_node.name,
                                            body_func_node.name,
                                            loop_var_names)
        new_stmts.append(while_loop_node)

        return new_stmts
Ejemplo n.º 13
0
    def get_for_stmt_nodes(self, node):
        # TODO: consider for - else in python

        # 1. get key statements for different cases
        # NOTE 1: three key statements:
        #   1). init_stmts: list[node], prepare nodes of for loop, may not only one
        #   2). cond_stmt: node, condition node to judge whether continue loop
        #   3). body_stmts: list[node], updated loop body, sometimes we should change
        #       the original statement in body, not just append new statement
        #
        # NOTE 2: The following `for` statements will be transformed to `while` statements:
        #   1). for x in range(*)
        #   2). for x in iter_var
        #   3). for i, x in enumerate(*)

        current_for_node_parser = ForNodeVisitor(node)
        stmts_tuple = current_for_node_parser.parse()
        if stmts_tuple is None:
            return [node]
        init_stmts, cond_stmt, body_stmts = stmts_tuple

        # 2. get original loop vars
        loop_var_names, create_var_names = self.name_visitor.get_loop_var_names(
            node)
        # NOTE: in 'for x in var' or 'for i, x in enumerate(var)' cases,
        # we need append new loop var & remove useless loop var
        #   1. for x in var -> x is no need
        #   2. for i, x in enumerate(var) -> x is no need
        if current_for_node_parser.is_for_iter(
        ) or current_for_node_parser.is_for_enumerate_iter():
            iter_var_name = current_for_node_parser.iter_var_name
            iter_idx_name = current_for_node_parser.iter_idx_name
            loop_var_names.add(iter_idx_name)
            if iter_var_name not in create_var_names:
                loop_var_names.remove(iter_var_name)

        # 3. prepare result statement list
        new_stmts = []
        # Python can create variable in loop and use it out of loop, E.g.
        #
        # for x in range(10):
        #     y += x
        # print(x) # x = 10
        #
        # We need to create static variable for those variables
        for name in create_var_names:
            if "." not in name:
                new_stmts.append(create_static_variable_gast_node(name))

        # 4. append init statements
        new_stmts.extend(init_stmts)

        # 5. create & append condition function node
        condition_func_node = gast.FunctionDef(
            name=unique_name.generate(FOR_CONDITION_PREFIX),
            args=gast.arguments(
                args=[
                    gast.Name(
                        id=name,
                        ctx=gast.Param(),
                        annotation=None,
                        type_comment=None) for name in loop_var_names
                ],
                posonlyargs=[],
                vararg=None,
                kwonlyargs=[],
                kw_defaults=None,
                kwarg=None,
                defaults=[]),
            body=[gast.Return(value=cond_stmt)],
            decorator_list=[],
            returns=None,
            type_comment=None)
        for name in loop_var_names:
            if "." in name:
                rename_transformer = RenameTransformer(condition_func_node)
                rename_transformer.rename(
                    name, unique_name.generate(GENERATE_VARIABLE_PREFIX))
        new_stmts.append(condition_func_node)

        # 6. create & append loop body function node
        # append return values for loop body
        body_stmts.append(
            gast.Return(value=generate_name_node(
                loop_var_names, ctx=gast.Load(), gen_tuple_if_single=True)))
        body_func_node = gast.FunctionDef(
            name=unique_name.generate(FOR_BODY_PREFIX),
            args=gast.arguments(
                args=[
                    gast.Name(
                        id=name,
                        ctx=gast.Param(),
                        annotation=None,
                        type_comment=None) for name in loop_var_names
                ],
                posonlyargs=[],
                vararg=None,
                kwonlyargs=[],
                kw_defaults=None,
                kwarg=None,
                defaults=[]),
            body=body_stmts,
            decorator_list=[],
            returns=None,
            type_comment=None)
        for name in loop_var_names:
            if "." in name:
                rename_transformer = RenameTransformer(body_func_node)
                rename_transformer.rename(
                    name, unique_name.generate(GENERATE_VARIABLE_PREFIX))
        new_stmts.append(body_func_node)

        # 7. create & append while loop node
        while_loop_nodes = create_while_nodes(
            condition_func_node.name, body_func_node.name, loop_var_names)
        new_stmts.extend(while_loop_nodes)

        return new_stmts
Ejemplo n.º 14
0
def create_while_nodes(condition_name, body_name, loop_var_names):
    """
    Returns a list of gast.Node which represents the calling of Paddle
    controlflow while_loop.

    Usually, the list just contain 1 statement such as:

    [a, b, c] = paddle.jit.dy2static.convert_while_loop(
            condition_name, body_name, [a, b, c])

    where a, b, c are in loop_var_names.

    However, if loop_var_names contains property such as foo.x, we cannot
    assign the property as output of convert_while_loop because Python
    property is a kind of read-only attribute. To handle the case, we replace
    the attributes which are output of convert_while_loop with generated
    variables, then if we know the attribute is not read-only at runtime, we
    assign the attribute. The created statements are like:

    [a, b, __attribute_variable_1] = paddle.jit.dy2static.convert_while_loop(
            condition_name, body_name, [a, b, foo.x])
    if not isinstance(getattr(type(foo), x, None), property): foo.x = __attribute_variable_1

    The number of above statements is not only 1, that's why the return type is
    a list of gast.Node.
    """
    # NOTE(liym27):
    # It's better to parse the source code into an AST node than to customize an AST node
    # including child nodes, because it is easy to mistake the ast node type when customizing the node.
    #
    # For example: loop_var_names = [a, b, foo.x], the type of `a` or `b` is gast.Name,
    # but the type of `foo.x` gast.Attribute.

    unique_name_to_origin = {}
    # We have to make loop_var_names and assign_loop_var_names with same order
    # set doesn't have order so we convert it to list
    loop_var_names = list(loop_var_names)
    assign_loop_var_names = []
    for name in (loop_var_names):
        if "." in name:
            # name is an attribute variable such as foo.x
            tmp_attr_name = unique_name.generate(ATTRIBUTE_VARIABLE_PREFIX)
            unique_name_to_origin[tmp_attr_name] = name
            assign_loop_var_names.append(tmp_attr_name)
        else:
            assign_loop_var_names.append(name)

    while_func_name = "_jst.convert_while_loop"
    while_node_str = "[{}] = {}({}, {}, [{}])".format(
        ",".join(assign_loop_var_names), while_func_name, condition_name,
        body_name, ",".join(loop_var_names))
    while_node = gast.parse(while_node_str).body[0]

    ret = [while_node]
    for tmp_attr_name in unique_name_to_origin:
        origin_attr_var = unique_name_to_origin[tmp_attr_name]
        dot_pos = origin_attr_var.rindex(".")
        obj_name = origin_attr_var[0:dot_pos]
        attr_name = origin_attr_var[dot_pos + 1:]
        assign_if_not_prop_str = "if not isinstance(getattr(type({}), '{}', None), property): {} = {}".format(
            obj_name, attr_name, origin_attr_var, tmp_attr_name)
        assign_if_not_prop_node = gast.parse(assign_if_not_prop_str).body[0]
        ret.append(assign_if_not_prop_node)
    return ret
Ejemplo n.º 15
0
def init_communicator(startup_program, main_program, current_endpoint,
                      endpoints, ring_id):
    nranks = len(endpoints)
    other_endpoints = endpoints[:]
    other_endpoints.remove(current_endpoint)
    group_rank = endpoints.index(current_endpoint)
    assert group_rank >= 0

    block = startup_program.global_block()
    nccl_id_var = block.create_var(name=unique_name.generate('nccl_id'),
                                   persistable=True,
                                   type=core.VarDesc.VarType.RAW)
    block.append_op(type='c_gen_nccl_id',
                    inputs={},
                    outputs={'Out': nccl_id_var},
                    attrs={
                        'rank': group_rank,
                        'endpoint': current_endpoint,
                        'other_endpoints': other_endpoints,
                        OP_ROLE_KEY: OpRole.Forward,
                    })
    block.append_op(type='c_comm_init',
                    inputs={'X': nccl_id_var},
                    outputs={},
                    attrs={
                        'nranks': nranks,
                        'rank': group_rank,
                        'ring_id': ring_id,
                        OP_ROLE_KEY: OpRole.Forward,
                    })

    # add input op for test
    fill_var_name = "tensor@Filled"
    fill_var = block.create_var(name=fill_var_name,
                                shape=[10, 10],
                                dtype='float32',
                                persistable=False,
                                stop_gradient=True)
    block.append_op(type="fill_constant",
                    outputs={"Out": fill_var_name},
                    attrs={
                        "shape": [10, 10],
                        "dtype": fill_var.dtype,
                        "value": 1.0,
                        "place_type": 1
                    })

    with fluid.program_guard(main_program):
        op_type = "c_allreduce_sum"
        data = fluid.layers.fill_constant(shape=[1],
                                          dtype='float32',
                                          value=2.5)
        helper = LayerHelper(op_type, **locals())
        helper.append_op(type=op_type,
                         inputs={'X': [data]},
                         outputs={'Out': [data]},
                         attrs={
                             'ring_id': ring_id,
                             'use_calc_stream': True
                         })

    print("startup program:", startup_program)
    print("main program:", main_program)
Ejemplo n.º 16
0
    def _scale_loss(self):

        main_block = paddle.static.default_main_program().global_block()
        main_block._sync_with_cpp()
        OP_ROLE_KEY = core.op_proto_and_checker_maker.kOpRoleAttrName()

        loss = self.get_attr("loss")
        assert loss is not None
        loss_op = loss.op
        loss_op_dist_attr = self.dist_context.get_op_dist_attr_for_program(
            loss_op)

        if loss.dtype != core.VarDesc.VarType.FP32:
            # cast loss here will change the effective loss tensor for the computation graph
            # and therefore will effect all following passes whose logic is based on the loss tensor(Recompute & Gradient Merge),
            # so we it is not allowed by now. fixed it in future.
            raise NotImplementedError(
                "Loss's generator op is not support in FP16 in Auto Parallel by now, please put that op into your black-list."
            )

            tmp_name = unique_name.generate(loss.name + ".cast_fp32")
            cast_loss = main_block.create_var(name=tmp_name, dtype=dtype)
            loss_dist_attr = self.dist_context.get_tensor_dist_attr_for_program(
                loss)
            ref_mesh = loss_op_dist_attr.process_mesh
            self.dist_context.set_tensor_dist_attr_for_program(
                cast_loss, loss_dist_attr)

            loss_op_idx = find_op_index(main_block.desc, loss_op.desc)
            cast_op = main_block._insert_op(
                loss_op_idx + 1,
                type='cast',
                inputs={'X': [loss]},
                outputs={'Out': [cast_loss]},
                attrs={
                    "in_dtype": loss.dtype,
                    "out_dtype": core.VarDesc.VarType.FP32,
                    'op_role': loss_op.all_attrs()[OP_ROLE_KEY],
                })

            loss_op._set_attr(OP_ROLE_KEY,
                              core.op_proto_and_checker_maker.OpRole.Forward)
            naive_set_dist_op_attr_for_program_by_mesh_and_mapping(
                cast_op, ref_mesh, [-1], self.dist_context)
            loss = loss.astype('float32')

        if self.get_attr("use_dynamic_loss_scaling"
                         ) or self.get_attr("init_loss_scaling") != 1.0:

            loss_op_idx = find_op_index(main_block.desc, loss_op.desc)

            # forward
            ref_mesh = loss_op_dist_attr.process_mesh
            self._scaled_loss = main_block.create_var(
                name=unique_name.generate("scaled_loss"),
                shape=loss.shape,
                dtype=loss.dtype,
                persistable=loss.persistable)
            set_var_dist_attr(self.dist_context, self._scaled_loss, [-1],
                              ref_mesh)

            elementwise_mul_op = main_block._insert_op(
                loss_op_idx + 1,
                type='elementwise_mul',
                inputs={
                    'X': [loss],
                    'Y': [self._loss_scaling]
                },
                outputs={'Out': [self._scaled_loss]},
                attrs={
                    'op_role': loss_op.all_attrs()[OP_ROLE_KEY],
                })
            loss_op._set_attr(OP_ROLE_KEY,
                              core.op_proto_and_checker_maker.OpRole.Forward)
            naive_set_dist_op_attr_for_program_by_mesh_and_mapping(
                elementwise_mul_op, ref_mesh, [-1], self.dist_context)

            # backward
            first_backward_op = main_block.ops[loss_op_idx + 2]
            assert first_backward_op.type == "fill_constant" and int(
                first_backward_op.all_attrs()[OP_ROLE_KEY]) == 257
            self._scaled_loss_grad = main_block.create_var(
                name=unique_name.generate("scaled_loss") + "@GRAD",
                shape=loss.shape,
                dtype=loss.dtype,
                persistable=loss.persistable)
            set_var_dist_attr(self.dist_context, self._scaled_loss_grad, [-1],
                              ref_mesh)
            pre_grad_name = first_backward_op.output_arg_names[0]
            first_backward_op._rename_output(pre_grad_name,
                                             self._scaled_loss_grad.name)
            # FIXME(JZ-LIANG) a trick to insert backward op
            main_block._sync_with_cpp()
            elementwise_mul_grad_op_desc = main_block.desc._insert_op(
                loss_op_idx + 3)
            elementwise_mul_grad_op_desc.set_type("elementwise_mul_grad")
            elementwise_mul_grad_op_desc.set_input(
                'Out@GRAD', [self._scaled_loss_grad.name])
            elementwise_mul_grad_op_desc.set_input('X', [loss.name])
            elementwise_mul_grad_op_desc.set_input('Y',
                                                   [self._loss_scaling.name])
            elementwise_mul_grad_op_desc.set_output('X@GRAD', [pre_grad_name])
            elementwise_mul_grad_op_desc.set_output('Y@GRAD', [])
            elementwise_mul_grad_op_desc._set_attr(
                OP_ROLE_KEY, core.op_proto_and_checker_maker.OpRole.Backward)
            elementwise_mul_grad_op_desc._set_attr('axis', -1)
            elementwise_mul_grad_op = paddle.fluid.framework.Operator(
                main_block, elementwise_mul_grad_op_desc)
            main_block.ops.insert(loss_op_idx + 3, elementwise_mul_grad_op)
            main_block._sync_with_cpp()
            elementwise_mul_grad_op = main_block.ops[loss_op_idx + 3]
            assert elementwise_mul_grad_op.type == "elementwise_mul_grad"
            naive_set_dist_op_attr_for_program_by_mesh_and_mapping(
                elementwise_mul_grad_op, ref_mesh, [-1], self.dist_context)

        else:
            self._scaled_loss = loss

        main_block._sync_with_cpp()
Ejemplo n.º 17
0
    def __call__(self, var, block):
        """Add xavier initialization ops for a variable
        Args:
            var: Variable that needs to be initialized
            block: The block in which initialization ops
                   should be added
        Returns:
            the initialization op
        """
        assert isinstance(block, framework.Block)
        check_variable_and_dtype(var, "Out", ["int64"], "xavier_init")

        if (var.dtype != VarDesc.VarType.INT64):
            raise ValueError("Only 'int64' dtype is supported in paddlefl's initializer.")

        f_in, f_out = self._compute_fans(var)

        # If fan_in and fan_out are passed, use them
        fan_in = f_in if self._fan_in is None else self._fan_in
        fan_out = f_out if self._fan_out is None else self._fan_out

        if self._seed == 0:
            self._seed = block.program.random_seed

        # create tmp var:
        # out_var for random number, shape = (1, ...)
        # out_expand_var for encrypted random number, shape = (2, ...), is same with var's shape
        out_dtype = VarDesc.VarType.FP32
        shape_ = list(var.shape)
        shape_[0]=1
        out_var = block.create_var(
            name=unique_name.generate(".".join(
                ['gaussian_random', var.name, 'tmp'])),
            shape=shape_,
            dtype=out_dtype,
            type=VarDesc.VarType.LOD_TENSOR,
            persistable=False)

        out_expand_var = block.create_var(
            name=unique_name.generate(".".join(
                ['gaussian_random_expand', var.name, 'tmp'])),
            shape=out_var.shape,
            dtype=out_dtype,
            type=VarDesc.VarType.LOD_TENSOR,
            persistable=False)

        if self._uniform:
            limit = np.sqrt(6.0 / float(fan_in + fan_out))
            op = block._prepend_op(
                type="uniform_random",
                inputs={},
                outputs={"Out": out_var},
                attrs={
                    "shape": out_var.shape,
                    "dtype": out_dtype,
                    "min": -limit,
                    "max": limit,
                    "seed": self._seed
                },
                stop_gradient=True)
        else:
            std = np.sqrt(2.0 / float(fan_in + fan_out))
            op = block._prepend_op(
                type="gaussian_random",
                outputs={"Out": out_var},
                attrs={
                    "shape": out_var.shape,
                    "dtype": out_dtype,
                    "mean": 0.0,
                    "std": std,
                    "seed": self._seed
                },
                stop_gradient=True)

        # convert plaintext into cyphertext
        block.append_op(
            type="scale",
            inputs={"X": out_var},
            outputs={"Out": out_var},
            attrs={"scale": float(mdu.mpc_one_share)})

        # extend one share to two share
        block.append_op(
            type="concat",
            inputs={"X": [out_var, out_var]},
            outputs={"Out": [out_expand_var]},
            attrs={"axis": 0})

        # cast float into int64
        block.append_op(
            type="cast",
            inputs={"X": out_expand_var},
            outputs={"Out": var},
            attrs={"in_dtype": out_expand_var.dtype,
                   "out_dtype": var.dtype})

        if not framework.in_dygraph_mode():
            var.op = op
        return op
Ejemplo n.º 18
0
    def create_mpc_parameter(self,
                             attr,
                             shape,
                             dtype,
                             is_bias=False,
                             default_initializer=None,
                             stop_gradient=False,
                             type=core.VarDesc.VarType.LOD_TENSOR):
        """
        Create mpc parameters for this layers.
        Refer to LayerHelper.create_parameter in Paddle 1.7.
        :param attr:
        :param shape:
        :param dtype:
        :param is_bias:
        :param default_initializer:
        :param stop_gradient:
        :param type:
        :return:
        """
        # Deepcopy the attr so that parameters can be shared in program
        attr = copy.deepcopy(attr)
        attr = ParamAttr._to_attr(attr)
        if not attr:
            return None
        assert isinstance(attr, ParamAttr)
        suffix = 'b' if is_bias else 'w'
        if attr.name is None:
            attr.name = unique_name.generate(".".join([self.name, suffix]))

        if default_initializer is None and attr.initializer is None:
            if isinstance(dtype, core.VarDesc.VarType):
                if dtype != core.VarDesc.VarType.INT64:
                    raise TypeError(
                        "Can not create mpc parameter with default initializer "
                        "when dtype is not int64 type. Set default_initializer "
                        "to fit the parameter dtype!")
            else:
                if not dtype == "int64":
                    raise TypeError(
                        "Can not create mpc parameter with default initializer when "
                        "dtype is not int64 type. Set default_initializer to "
                        "fit the parameter dtype!")
            if is_bias:
                attr._set_default_bias_initializer()
            else:
                attr._set_default_initializer(XavierInitializer(seed=65536))
        else:
            attr._set_default_initializer(default_initializer)

        # TODO(xukun07): not support WeightNormParamAttr in this first version
        # Paddle1.7: If weight normalization is set, insert extra parameters and ops.
        # Refer to https://arxiv.org/pdf/1602.07868.pdf
        if isinstance(attr, WeightNormParamAttr):
            # param = self._create_weight_normalize(attr, shape, dtype)
            # WeightNormParamAttr.params_with_weight_norm.append(param)
            # return param
            raise NotImplementedError(
                "The WeightNormParamAttr for attr is not "
                "supported in this version")

        startup_program_global_block = self.startup_program.global_block()
        create_mpc_parameter(
            block=startup_program_global_block,
            dtype=dtype,
            shape=shape,
            type=type,
            **attr._to_kwargs(with_initializer=True))
        main_program_global_block = self.main_program.global_block()
        return create_mpc_parameter(
            block=main_program_global_block,
            dtype=dtype,
            shape=shape,
            type=type,
            **attr._to_kwargs())
Ejemplo n.º 19
0
    def get_while_stmt_nodes(self, node):
        loop_var_names, create_var_names = self.name_visitor.get_loop_var_names(
            node)
        new_stmts = []

        # Python can create variable in loop and use it out of loop, E.g.
        #
        # while x < 10:
        #     x += 1
        #     y = x
        # z = y
        #
        # We need to create static variable for those variables
        for name in create_var_names:
            if "." not in name:
                new_stmts.append(create_static_variable_gast_node(name))

        condition_func_node = gast.FunctionDef(
            name=unique_name.generate(WHILE_CONDITION_PREFIX),
            args=gast.arguments(
                args=[
                    gast.Name(
                        id=name,
                        ctx=gast.Param(),
                        annotation=None,
                        type_comment=None) for name in loop_var_names
                ],
                posonlyargs=[],
                vararg=None,
                kwonlyargs=[],
                kw_defaults=None,
                kwarg=None,
                defaults=[]),
            body=[gast.Return(value=node.test)],
            decorator_list=[],
            returns=None,
            type_comment=None)

        for name in loop_var_names:
            if "." in name:
                rename_transformer = RenameTransformer(condition_func_node)
                rename_transformer.rename(
                    name, unique_name.generate(GENERATE_VARIABLE_PREFIX))
        new_stmts.append(condition_func_node)

        new_body = node.body
        new_body.append(
            gast.Return(value=generate_name_node(
                loop_var_names, ctx=gast.Load(), gen_tuple_if_single=True)))
        body_func_node = gast.FunctionDef(
            name=unique_name.generate(WHILE_BODY_PREFIX),
            args=gast.arguments(
                args=[
                    gast.Name(
                        id=name,
                        ctx=gast.Param(),
                        annotation=None,
                        type_comment=None) for name in loop_var_names
                ],
                posonlyargs=[],
                vararg=None,
                kwonlyargs=[],
                kw_defaults=None,
                kwarg=None,
                defaults=[]),
            body=new_body,
            decorator_list=[],
            returns=None,
            type_comment=None)
        for name in loop_var_names:
            if "." in name:
                rename_transformer = RenameTransformer(body_func_node)
                rename_transformer.rename(
                    name, unique_name.generate(GENERATE_VARIABLE_PREFIX))
        new_stmts.append(body_func_node)

        while_loop_nodes = create_while_nodes(
            condition_func_node.name, body_func_node.name, loop_var_names)
        new_stmts.extend(while_loop_nodes)
        return new_stmts
Ejemplo n.º 20
0
    def get_while_stmt_nodes(self, node):
        # TODO: consider while - else in python
        if not self.name_visitor.is_control_flow_loop(node):
            return [node]

        loop_var_names, create_var_names = self.name_visitor.get_loop_var_names(
            node)
        new_stmts = []

        # Python can create variable in loop and use it out of loop, E.g.
        #
        # while x < 10:
        #     x += 1
        #     y = x
        # z = y
        #
        # We need to create static variable for those variables
        for name in create_var_names:
            if "." not in name:
                new_stmts.append(create_static_variable_gast_node(name))

        # while x < 10 in dygraph should be convert into static tensor < 10
        for name in loop_var_names:
            new_stmts.append(to_static_variable_gast_node(name))

        logical_op_transformer = LogicalOpTransformer(node.test)
        cond_value_node = logical_op_transformer.transform()

        condition_func_node = gast.FunctionDef(
            name=unique_name.generate(WHILE_CONDITION_PREFIX),
            args=gast.arguments(args=[
                gast.Name(id=name,
                          ctx=gast.Param(),
                          annotation=None,
                          type_comment=None) for name in loop_var_names
            ],
                                posonlyargs=[],
                                vararg=None,
                                kwonlyargs=[],
                                kw_defaults=None,
                                kwarg=None,
                                defaults=[]),
            body=[gast.Return(value=cond_value_node)],
            decorator_list=[],
            returns=None,
            type_comment=None)
        for name in loop_var_names:
            if "." in name:
                rename_transformer = RenameTransformer(condition_func_node)
                rename_transformer.rename(
                    name, unique_name.generate(GENERATE_VARIABLE_PREFIX))
        new_stmts.append(condition_func_node)

        new_body = node.body
        new_body.append(
            gast.Return(
                value=generate_name_node(loop_var_names, ctx=gast.Load())))
        body_func_node = gast.FunctionDef(
            name=unique_name.generate(WHILE_BODY_PREFIX),
            args=gast.arguments(args=[
                gast.Name(id=name,
                          ctx=gast.Param(),
                          annotation=None,
                          type_comment=None) for name in loop_var_names
            ],
                                posonlyargs=[],
                                vararg=None,
                                kwonlyargs=[],
                                kw_defaults=None,
                                kwarg=None,
                                defaults=[]),
            body=new_body,
            decorator_list=[],
            returns=None,
            type_comment=None)
        for name in loop_var_names:
            if "." in name:
                rename_transformer = RenameTransformer(body_func_node)
                rename_transformer.rename(
                    name, unique_name.generate(GENERATE_VARIABLE_PREFIX))
        new_stmts.append(body_func_node)

        while_loop_node = create_while_node(condition_func_node.name,
                                            body_func_node.name,
                                            loop_var_names)
        new_stmts.append(while_loop_node)
        return new_stmts
Ejemplo n.º 21
0
 def create_new_para_name(attr):
     if attr:
         assert attr.name, "attr should have a name already!"
         name_key = 'PARL_target_' + attr.name
         attr.name = unique_name.generate(name_key)
Ejemplo n.º 22
0
    def fp16_compression(param_and_grads):
        """
        Compress fp32 gradients to fp16 during allreduce.
        """
        op_maker = core.op_proto_and_checker_maker

        new_param_and_grads = []  # param, grad, is_cast
        # cast grad from fp32->fp16 before allreduce,
        for param, grad in param_and_grads:
            if grad is None or grad.dtype != core.VarDesc.VarType.FP32:
                new_param_and_grads.append((param, grad, False))
                continue

            op = grad.op
            block = grad.block
            var_attr = op.all_attrs()[op_maker.kOpRoleVarAttrName()]
            if param.name not in var_attr:
                new_param_and_grads.append((param, grad, False))
                continue

            # remove (param, grad) from op_role_var
            var_attr.remove(param.name)
            var_attr.remove(grad.name)
            if len(var_attr) > 1:
                op._set_attr(op_maker.kOpRoleVarAttrName(), var_attr)
            else:
                op._remove_attr(op_maker.kOpRoleVarAttrName())

            new_grad = block.create_var(
                name=unique_name.generate(grad.name + ".cast_fp16"),
                dtype=core.VarDesc.VarType.FP16,
                persistable=False,
                stop_gradient=True)

            with block.program._backward_role_guard():
                cast_op = block.append_op(type="cast",
                                          inputs={"X": grad},
                                          outputs={"Out": new_grad},
                                          attrs={
                                              "in_dtype":
                                              core.VarDesc.VarType.FP32,
                                              "out_dtype":
                                              core.VarDesc.VarType.FP16
                                          },
                                          stop_gradient=True)

                backward = op_maker.OpRole.Backward
                cast_op._set_attr(op_maker.kOpRoleAttrName(), backward)
                cast_op._set_attr(op_maker.kOpRoleVarAttrName(),
                                  [param.name, new_grad.name])
                new_grad.op = cast_op

            new_param_and_grads.append((param, new_grad, True))

        ret_param_and_grads = []
        # cast grad from fp16->fp32 after allreduce.
        # NOTE. Now we split fp16 compression into two for loops,
        # if we do not separate them, fuse allreduce will wrong.
        # This must be the problem of fuse allreduce pass, need
        # fixed in future.
        for param, grad, cast in new_param_and_grads:
            if not cast:
                ret_param_and_grads.append((param, grad))
                continue

            block = grad.block
            new_grad = block.create_var(
                name=unique_name.generate(grad.name + ".cast_fp32"),
                dtype=core.VarDesc.VarType.FP32,
                persistable=False,
                stop_gradient=True)

            with block.program._optimized_guard(
                [param, grad]), framework.name_scope('fp16_allreduce'):
                cast_op = block.append_op(type="cast",
                                          inputs={"X": grad},
                                          outputs={"Out": new_grad},
                                          attrs={
                                              "in_dtype":
                                              core.VarDesc.VarType.FP16,
                                              "out_dtype":
                                              core.VarDesc.VarType.FP32
                                          },
                                          stop_gradient=True)
            ret_param_and_grads.append((param, new_grad))

        return ret_param_and_grads
    def _update_name_to_var_shape(self, node):
        def replace_dot(name):
            # replace all '.' into '_'
            return name.replace('.', '_')

        assert isinstance(node, gast.Assign)
        target_node = node.targets[0]
        value_node = node.value

        update_static_shape_var_node = None
        if isinstance(target_node, gast.Tuple):
            update_static_shape_var_node = []
            for idx, element in enumerate(target_node.elts):
                target_id = ast_to_source_code(element).strip()

                if isinstance(value_node, gast.Name):
                    if value_node.id in self.name_to_var_shape:
                        # TODO(zhhsplendid): is context a problem for the result node of gast.parse?
                        static_shape_var_name = unique_name.generate(
                            replace_dot(target_id) +
                            STATIC_CONVERT_VAR_SHAPE_SUFFIX)
                        static_shape_var_node = gast.parse(
                            static_shape_var_name).body[0].value

                        static_shape_value_name = self.name_to_var_shape[
                            value_node.id]

                        sub_node_str = "{}[{}]".format(static_shape_value_name,
                                                       idx)
                        sub_node = gast.parse(sub_node_str).body[0].value

                        update_static_shape_var_node.append(
                            gast.Assign(
                                targets=[static_shape_var_node],
                                value=sub_node))

                        self.name_to_var_shape[
                            target_id] = static_shape_var_name
                if isinstance(value_node, gast.Attribute):
                    if self._is_var_shape(value_node):  # eg: x.shape
                        static_shape_var_name = unique_name.generate(
                            replace_dot(target_id) +
                            STATIC_CONVERT_VAR_SHAPE_SUFFIX)
                        static_shape_var_node = gast.parse(
                            static_shape_var_name).body[0].value

                        static_shape_value_node = copy.deepcopy(value_node)
                        # x.shape becomes convert_var_shape_simple(x)
                        static_shape_value_node = ShapeAttributeTransformer(
                        ).visit(static_shape_value_node)

                        sub_node_str = "{}[{}]".format(
                            ast_to_source_code(static_shape_value_node).strip(),
                            idx)
                        sub_node = gast.parse(sub_node_str).body[0].value
                        # Note(Aurelius84): Becuase static_shape_var_name is used in
                        # eval_if_exist_else_none() as plain string, so it will not 
                        # be pasred as argument in convert_loop/ifelse. We delcare it
                        # as global var because it has unique name.
                        update_static_shape_var_node.append(
                            gast.Global(names=[static_shape_var_name]))

                        update_static_shape_var_node.append(
                            gast.Assign(
                                targets=[static_shape_var_node],
                                value=sub_node))
                        self.name_to_var_shape[
                            target_id] = static_shape_var_name
            return update_static_shape_var_node
        else:
            target_id = ast_to_source_code(target_node).strip()
            if isinstance(value_node, gast.Name):
                if value_node.id in self.name_to_var_shape:
                    static_shape_var_name = unique_name.generate(
                        replace_dot(target_id) +
                        STATIC_CONVERT_VAR_SHAPE_SUFFIX)
                    static_shape_var_node = gast.parse(
                        static_shape_var_name).body[0].value
                    static_shape_value_name = self.name_to_var_shape[
                        value_node.id]
                    static_shape_value_node = gast.parse(
                        static_shape_value_name).body[0].value

                    update_static_shape_var_node = [
                        gast.Assign(
                            targets=[static_shape_var_node],
                            value=static_shape_value_node)
                    ]
                    self.name_to_var_shape[target_id] = static_shape_var_name
            elif self._is_var_shape(value_node):  # eg: x.shape or x.shape[0]
                static_shape_var_name = unique_name.generate(
                    replace_dot(target_id) + STATIC_CONVERT_VAR_SHAPE_SUFFIX)
                static_shape_var_node = gast.parse(static_shape_var_name).body[
                    0].value
                static_shape_value_node = copy.deepcopy(value_node)
                # x.shape becomes convert_var_shape_simple(x)
                static_shape_value_node = ShapeAttributeTransformer().visit(
                    static_shape_value_node)
                # Declare static_shape_var_name as global var
                update_static_shape_var_node = [
                    gast.Global(names=[static_shape_var_name])
                ]
                update_static_shape_var_node.append(
                    gast.Assign(
                        targets=[static_shape_var_node],
                        value=static_shape_value_node))
                self.name_to_var_shape[target_id] = static_shape_var_name
        return update_static_shape_var_node
Ejemplo n.º 24
0
 def _get_offload_var_name(self, name):
     return unique_name.generate(name + '@offload')
Ejemplo n.º 25
0
 def _create_persistable_tensor(self, name, type, dtype):
     return framework.default_main_program().current_block().create_var(
         name=unique_name.generate(name),
         type=type,
         dtype=dtype,
         persistable=True)
Ejemplo n.º 26
0
    def _shard_parameter(self, main_block, startup_block):

        if self.stage < 3:
            return

        dp_ring_ids = [group.id for group in self.dp_groups]
        for sharding_info in self.sharding_infos:
            need_broadcast_vars, param_usage = sharding_info.get_broadcast_vars_and_param_usage(
                main_block)
            not_used_param_nane = []
            for param_name in param_usage:
                if param_usage[param_name] == 0 and sharding_info.get_var_rank(
                        param_name) != sharding_info.local_rank:
                    not_used_param_nane.append(param_name)

            for idx, op in reversed(list(enumerate(main_block.ops))):
                if is_optimizer_op(op):
                    continue

                for input_name in op.desc.input_arg_names():
                    if op.type == "cast":
                        continue
                    if input_name not in need_broadcast_vars:
                        continue
                    root_rank = sharding_info.get_var_rank(input_name)
                    if root_rank == sharding_info.local_rank:
                        broadcast_varname = input_name
                    else:
                        broadcast_varname = unique_name.generate(input_name +
                                                                 "@BroadCast")
                        input_var = main_block.var(input_name)
                        new_var = main_block.create_var(name=broadcast_varname,
                                                        shape=input_var.shape,
                                                        dtype=input_var.dtype,
                                                        persistable=False)
                        ref_dist_attr = self._dist_context.get_tensor_dist_attr_for_program(
                            input_var)
                        out_var_dist_attr = set_var_dist_attr(
                            self._dist_context, new_var,
                            ref_dist_attr.dims_mapping,
                            ref_dist_attr.process_mesh)
                        op._rename_input(input_name, broadcast_varname)

                    _insert_init_and_broadcast_op(main_block, idx,
                                                  broadcast_varname,
                                                  sharding_info.local_rank,
                                                  root_rank,
                                                  sharding_info.group.id,
                                                  op.attr('op_role'),
                                                  self._dist_context)

            for idx, op in reversed(list(enumerate(main_block.ops))):
                if op.type != "cast":
                    continue
                input_name = op.input_arg_names[0]
                output_name = op.output_arg_names[0]
                if input_name in not_used_param_nane:
                    main_block._remove_op(idx, sync=False)
                    main_block._remove_var(output_name, sync=False)

            for idx, op in reversed(list(enumerate(startup_block.ops))):
                assert len(op.output_arg_names) == 1
                output_name = op.output_arg_names[0]

                if op.type == "c_broadcast" and op.attr(
                        "ring_id") in dp_ring_ids:
                    if self.outer_dp_group and sharding_info.get_var_rank(
                            output_name) == sharding_info.local_rank:
                        op._set_attr("ring_id", self.outer_dp_group.id)
                    else:
                        startup_block._remove_op(idx, sync=False)
                    continue

                if op.type != "c_broadcast" and output_name in param_usage and sharding_info.get_var_rank(
                        output_name) != sharding_info.local_rank:
                    startup_block._remove_op(idx, sync=False)

            for param_name in param_usage:
                if sharding_info.get_var_rank(
                        param_name) != sharding_info.local_rank:
                    main_block._remove_var(param_name, sync=False)
                    startup_block._remove_var(param_name, sync=False)

        main_block._sync_with_cpp()
        startup_block._sync_with_cpp()
Ejemplo n.º 27
0
    def __init__(self,
                 block,
                 type=core.VarDesc.VarType.LOD_TENSOR,
                 name=None,
                 shape=None,
                 dtype=None,
                 lod_level=None,
                 capacity=None,
                 persistable=None,
                 error_clip=None,
                 stop_gradient=False,
                 is_data=False,
                 need_check_feed=False,
                 belong_to_optimizer=False,
                 **kwargs):
        self.block = block
        if name is None:
            name = unique_name.generate('_generated_var')

        if dtype is not None:
            if not isinstance(dtype, core.VarDesc.VarType):
                dtype = convert_np_dtype_to_dtype_(dtype)

        self.belong_to_optimizer = belong_to_optimizer

        self.error_clip = error_clip

        is_new_var = False
        name = cpt.to_text(name)
        self.desc = self.block.desc.find_var(cpt.to_bytes(name))

        if self.desc is None:
            self.desc = self.block.desc.var(cpt.to_bytes(name))
            is_new_var = True

        if is_new_var:
            self.desc.set_type(type)
        elif self.desc.type() != type:
            raise ValueError("MpcVariable {0} has been created before. The "
                             "previous type is {1}; the new type is {2}. They"
                             " are not matched".format(self.name,
                                                       self.desc.type(), type))
        if shape is not None:
            if is_new_var:
                # resize the shape for MpcVariable
                mpc_shape = list(shape)
                mpc_shape.insert(0, 2)
                self.desc.set_shape(mpc_shape)
            else:
                old_shape = self.shape
                shape = tuple(shape)
                if shape != old_shape:
                    raise ValueError(
                        "MpcVariable {0} has been created before. the previous "
                        "shape is {1}; the new shape is {2}. They are not "
                        "matched.".format(self.name, old_shape, shape))
        if dtype is not None:
            if is_new_var:
                self.desc.set_dtype(dtype)
            else:
                old_dtype = self.dtype
                if dtype != old_dtype:
                    raise ValueError(
                        "MpcVariable {0} has been created before. "
                        "The previous data type is {1}; the new "
                        "data type is {2}. They are not "
                        "matched.".format(self.name, old_dtype, dtype))

        if lod_level is not None:
            if is_new_var:
                self.desc.set_lod_level(lod_level)
            else:
                if lod_level != self.lod_level:
                    raise ValueError(
                        "MpcVariable {0} has been created before. "
                        "The previous lod_level is {1}; the new "
                        "lod_level is {2}. They are not "
                        "matched".format(self.name, self.lod_level, lod_level))
        if persistable is not None:
            if is_new_var:
                self.desc.set_persistable(persistable)
            else:
                if persistable != self.persistable:
                    raise ValueError(
                        "MpcVariable {0} has been created before."
                        "The previous persistable is {1}; the new "
                        "persistable is {2}. They are not matched".format(
                            self.name, self.persistable, persistable))

        if need_check_feed and is_new_var:
            self.desc.set_need_check_feed(need_check_feed)

        if capacity is not None:
            if is_new_var:
                self.desc.set_capacity(capacity)
            else:
                # TODO(abhinavarora) by Paddle 1.7: Compare with set capacity once,
                # get_capacity is implemented
                pass

        self.block.vars[name] = self
        self.op = None
        self._stop_gradient = stop_gradient
        self.is_data = is_data
Ejemplo n.º 28
0
    def visit_FunctionDef(self, node):
        self.function_def.append(node)
        self.return_value_name[node] = None
        self.return_name[node] = []
        self.return_no_value_name[node] = []

        self.pre_analysis = ReturnAnalysisVisitor(node)
        max_return_length = self.pre_analysis.get_func_max_return_length(node)
        while self.pre_analysis.get_func_return_count(node) > 1:
            self.generic_visit(node)
            self.pre_analysis = ReturnAnalysisVisitor(node)

        if max_return_length == 0:
            self.function_def.pop()
            return node

        # Prepend initialization of final return and append final return statement
        value_name = self.return_value_name[node]
        if value_name is not None:
            node.body.append(
                gast.Return(value=gast.Name(id=value_name,
                                            ctx=gast.Load(),
                                            annotation=None,
                                            type_comment=None)))
            init_names = [
                unique_name.generate(RETURN_VALUE_INIT_NAME)
                for i in range(max_return_length)
            ]
            assign_zero_nodes = [
                create_fill_constant_node(iname, 0.0) for iname in init_names
            ]
            if len(init_names) == 1:
                return_value_nodes = gast.Name(id=init_names[0],
                                               ctx=gast.Load(),
                                               annotation=None,
                                               type_comment=None)
            else:
                # We need to initialize return value as a tuple because control
                # flow requires some inputs or outputs have same structure
                return_value_nodes = gast.Tuple(elts=[
                    gast.Name(id=iname,
                              ctx=gast.Load(),
                              annotation=None,
                              type_comment=None) for iname in init_names
                ],
                                                ctx=gast.Load())
            assign_return_value_node = gast.Assign(targets=[
                gast.Name(id=value_name,
                          ctx=gast.Store(),
                          annotation=None,
                          type_comment=None)
            ],
                                                   value=return_value_nodes)
            node.body.insert(0, assign_return_value_node)
            node.body[:0] = assign_zero_nodes

        # Prepend no value placeholders
        for name in self.return_no_value_name[node]:
            assign_no_value_node = create_fill_constant_node(
                name, RETURN_NO_VALUE_MAGIC_NUM)
            node.body.insert(0, assign_no_value_node)

        self.function_def.pop()
        return node
Ejemplo n.º 29
0
    def modify_forward_desc_for_recompute(self, dist_context):
        """
        If program's foward part has 'dropout' op, this function will insert 
        a seed op before it to guarantee that two dropout op have the same outputs.
        """
        op_types = [op.desc.type() for op in self._ops]
        if "dropout" not in op_types:
            return

        op_idx = 0
        while op_idx < len(self._ops):
            cur_op = self._ops[op_idx]
            if "grad" in cur_op.type:
                break
            if cur_op.type != "dropout":
                op_idx += 1
                continue
            if cur_op.input("Seed") is not None and len(cur_op.input("Seed")):
                op_idx += 1
                continue

            cur_op_dist_attr = dist_context.get_op_dist_attr_for_program(
                cur_op)
            # insert seed op to guarantee that two dropout op have the same outputs
            op_unique_name = unique_name.generate("seed")
            var_unique_name = unique_name.generate_with_ignorable_key(".".join(
                [op_unique_name, 'tmp']))
            seed_var = self._block.create_var(
                name=var_unique_name,
                dtype='int32',
                type=core.VarDesc.VarType.LOD_TENSOR,
                persistable=False,
                stop_gradient=False)

            # set new seed_var's dist_attr
            ref_dims_mapping = [-1]
            ref_process_mesh = cur_op_dist_attr.process_mesh
            seed_var_dist_attr = set_var_dist_attr(dist_context, seed_var,
                                                   ref_dims_mapping,
                                                   ref_process_mesh)

            seed = 0 if cur_op.attr("fix_seed") is False else int(
                cur_op.attr("seed"))
            seed_op = self._block._insert_op_without_sync(
                index=cur_op.idx,
                type="seed",
                inputs={},
                outputs={"Out": seed_var},
                attrs={
                    "seed": seed,
                    "force_cpu": True
                })
            # set new seed op's dist_attr
            naive_set_dist_op_attr_for_program_by_mesh_and_mapping(
                seed_op, ref_process_mesh, ref_dims_mapping, dist_context)

            # modify dropout op's desc
            self._ops.insert(op_idx, seed_op)
            cur_op.desc.set_input("Seed", [var_unique_name])
            cur_op.desc.remove_attr("fix_seed")
            cur_op.desc.remove_attr("seed")
            cur_op_dist_attr.set_input_dist_attr(seed_var.name,
                                                 seed_var_dist_attr)
            self._block._sync_with_cpp()
            op_idx += 2
Ejemplo n.º 30
0
    def _replace_return_in_stmt_list(self, stmt_list, return_node, return_name,
                                     max_return_length, parent_node_of_return):

        assert max_return_length >= 0, "Input illegal max_return_length"
        i = index_in_list(stmt_list, return_node)
        if i == -1:
            return False

        assign_nodes = []
        # Here assume that the parent node of return is gast.If
        if isinstance(parent_node_of_return, gast.If):
            # Prepend control flow boolean nodes such as '__return@1 = True'
            node_str = "{} = _jst.create_bool_as_type({}, True)".format(
                return_name,
                ast_to_source_code(parent_node_of_return.test).strip())

            assign_true_node = gast.parse(node_str).body[0]
            assign_nodes.append(assign_true_node)

        cur_func_node = self.function_def[-1]
        return_length = get_return_size(return_node)
        if return_length < max_return_length:
            # In this case we should append RETURN_NO_VALUE placeholder
            #
            # max_return_length must be >= 1 here because return_length will be
            # 0 at least.
            if self.return_value_name[cur_func_node] is None:
                self.return_value_name[cur_func_node] = unique_name.generate(
                    RETURN_VALUE_PREFIX)

            no_value_names = [
                unique_name.generate(RETURN_NO_VALUE_VAR_NAME)
                for j in range(max_return_length - return_length)
            ]
            self.return_no_value_name[cur_func_node].extend(no_value_names)

            # Handle tuple/non-tuple case
            if max_return_length == 1:
                assign_nodes.append(
                    gast.Assign(targets=[
                        gast.Name(id=self.return_value_name[cur_func_node],
                                  ctx=gast.Store(),
                                  annotation=None,
                                  type_comment=None)
                    ],
                                value=gast.Name(id=no_value_names[0],
                                                ctx=gast.Load(),
                                                annotation=None,
                                                type_comment=None)))
            else:
                # max_return_length > 1 which means we should assign tuple
                fill_tuple = [
                    gast.Name(id=n,
                              ctx=gast.Load(),
                              annotation=None,
                              type_comment=None) for n in no_value_names
                ]
                if return_node.value is not None:
                    if isinstance(return_node.value, gast.Tuple):
                        fill_tuple[:0] = return_node.value.elts
                    else:
                        fill_tuple.insert(0, return_node.value)

                assign_nodes.append(
                    gast.Assign(targets=[
                        gast.Name(id=self.return_value_name[cur_func_node],
                                  ctx=gast.Store(),
                                  annotation=None,
                                  type_comment=None)
                    ],
                                value=gast.Tuple(elts=fill_tuple,
                                                 ctx=gast.Load())))
        else:
            # In this case we should NOT append RETURN_NO_VALUE placeholder
            if return_node.value is not None:
                cur_func_node = self.function_def[-1]
                if self.return_value_name[cur_func_node] is None:
                    self.return_value_name[
                        cur_func_node] = unique_name.generate(
                            RETURN_VALUE_PREFIX)

                assign_nodes.append(
                    gast.Assign(targets=[
                        gast.Name(id=self.return_value_name[cur_func_node],
                                  ctx=gast.Store(),
                                  annotation=None,
                                  type_comment=None)
                    ],
                                value=return_node.value))

        stmt_list[i:] = assign_nodes
        return True
Ejemplo n.º 31
0
    def __init__(self,
                 learning_rate=0.001,
                 lamb_weight_decay=0.01,
                 beta1=0.9,
                 beta2=0.999,
                 epsilon=1e-6,
                 parameters=None,
                 grad_clip=None,
                 exclude_from_weight_decay_fn=None,
                 clip_after_allreduce=True,
                 is_grad_scaled_by_nranks=True,
                 alignment=128,
                 use_master_param_norm=True,
                 gradient_accumulation_steps=1,
                 name=None):
        assert not framework._non_static_mode(
        ), "DistributedFusedLamb does not support dygraph mode"
        super(DistributedFusedLamb, self).__init__(learning_rate=learning_rate,
                                                   grad_clip=None,
                                                   name=name)

        self._beta1 = beta1
        self._beta2 = beta2
        self._epsilon = epsilon
        self._weight_decay = lamb_weight_decay if lamb_weight_decay is not None else 0.0
        if grad_clip is not None:
            assert isinstance(
                grad_clip, ClipGradByGlobalNorm
            ), "Only ClipGradByGlobalNorm is supported in DistributedFusedLamb"
            max_global_grad_norm = grad_clip.clip_norm
        else:
            max_global_grad_norm = -1.0
        self._max_global_grad_norm = max_global_grad_norm
        self._alignment = alignment if alignment is not None else -1
        self._clip_after_allreduce = clip_after_allreduce
        self._is_grad_scaled_by_nranks = is_grad_scaled_by_nranks
        self._exclude_from_weight_decay_fn = exclude_from_weight_decay_fn
        self._scale = None
        self._ring_id = 0
        self._use_master_param_norm = use_master_param_norm
        self._gradient_accumulation_steps = gradient_accumulation_steps
        assert self._gradient_accumulation_steps >= 1

        self.helper = LayerHelper('distributed_fused_lamb')
        self._supports_check_nan_inf = True  # very import flag for AMP

        main_block = self.helper.main_program.global_block()
        self._found_inf = main_block.create_var(
            name=unique_name.generate('found_inf'),
            shape=[1],
            dtype=core.VarDesc.VarType.BOOL)
        self._step = None

        if self._gradient_accumulation_steps > 1:
            self._stop_update = main_block.create_var(
                name=unique_name.generate('stop_update'),
                shape=[1],
                dtype=core.VarDesc.VarType.BOOL)
        else:
            self._stop_update = None

        self._param_to_master_param = {}