Ejemplo n.º 1
0
    def _build_trainer_programs(self):

        add_lr_decay_table_pass = new_pass("add_lr_decay_table_pass",
                                           self.attrs)
        add_lr_decay_table_pass.apply([], [], self.pass_ctx)

        distributed_ops_pass = new_pass("distributed_ops_pass", self.attrs)
        distributed_ops_pass.apply([self.cloned_main], [None], self.pass_ctx)

        fake_init_ops_pass = new_pass("fake_init_ops_pass", self.attrs)
        fake_init_ops_pass.apply([None], [self.cloned_startup], self.pass_ctx)

        ps_gpu_pass = new_pass("ps_gpu_pass", self.attrs)
        ps_gpu_pass.apply([self.cloned_main], [None], self.pass_ctx)

        ps_transpile_pass = new_pass("ps_transpile_pass", self.attrs)
        ps_transpile_pass.apply([self.cloned_main], [self.cloned_startup],
                                self.pass_ctx)

        self.attrs['origin_main_program'] = self.cloned_main
        self.attrs['origin_startup_program'] = self.cloned_startup

        if self.launch_barrier and self.launch_barrier_flag:
            wait_server_ready(self.server_endpoints)

        return
 def apply_passes(self, main_prog, startup_prog):
     pass_manager = PassManager([
         new_pass("build_cinn"),
         new_pass("fuse_elewise_add_act"),
     ])
     pass_manager.apply([main_prog], [startup_prog])
     print(pass_manager.names)
Ejemplo n.º 3
0
    def _apply_pre_optimization(self, main_program, startup_program, loss,
                                optimizer, params_grads):
        if self._strategy is None:
            return
        # apply amp pass
        if self._strategy.amp:
            config = copy.deepcopy(self._strategy.amp_configs)
            config["dist_context"] = self._dist_context
            config["params_grads"] = params_grads
            config["loss"] = loss
            config["input_data"] = self._dist_context.serial_feed_vars["inputs"] \
                + self._dist_context.serial_feed_vars["labels"]
            if config["use_pure_fp16"]:
                config["base_opt"] = optimizer
                auto_parallel_fp16_pass = new_pass("auto_parallel_fp16",
                                                   config)
                auto_parallel_fp16_pass.apply([main_program],
                                              [startup_program],
                                              self._pass_context)
            else:
                auto_parallel_amp_pass = new_pass("auto_parallel_amp", config)
                auto_parallel_amp_pass.apply([main_program], [startup_program],
                                             self._pass_context)

        # apply recompute pass
        if self._strategy.recompute:
            config = copy.deepcopy(self._strategy.recompute_configs)
            config["dist_context"] = self._dist_context
            config["no_grad_set"] = None
            config["loss"] = loss
            auto_parallel_recompute_pass = new_pass("auto_parallel_recompute",
                                                    config)
            auto_parallel_recompute_pass.apply([main_program],
                                               [startup_program],
                                               self._dist_context)
Ejemplo n.º 4
0
    def _apply_pre_optimization_passes(self, main_program, startup_program,
                                       loss, params_grads, no_grad_set):
        # apply amp pass
        if self._dist_strategy.amp:
            config = copy.deepcopy(self._dist_strategy.amp_configs)
            config["dist_context"] = self._dist_context
            config["params_grads"] = params_grads
            config["loss"] = loss
            if config["use_pure_fp16"]:
                config["base_opt"] = self._optimizer
                auto_parallel_fp16_pass = new_pass("auto_parallel_fp16",
                                                   config)
                auto_parallel_fp16_pass.apply([main_program],
                                              [startup_program],
                                              self._pass_context)
            else:
                auto_parallel_amp_pass = new_pass("auto_parallel_amp", config)
                auto_parallel_amp_pass.apply([main_program], [startup_program],
                                             self._pass_context)

        # apply recompute pass
        if self._dist_strategy.recompute:
            config = copy.deepcopy(self._dist_strategy.recompute_configs)
            config["dist_context"] = self._dist_context
            config["no_grad_set"] = copy.deepcopy(no_grad_set)
            config["loss"] = loss
            auto_parallel_recompute_pass = new_pass("auto_parallel_recompute",
                                                    config)
            auto_parallel_recompute_pass.apply([main_program],
                                               [startup_program],
                                               self._pass_context)
 def apply_passes(self, main_prog, startup_prog):
     pass_manager = PassManager([
         new_pass("fuse_elewise_add_act"),
         new_pass("fuse_all_reduce", {"max_memory_size": 1024 * 1024})
     ])
     pass_manager.apply([main_prog], [startup_prog])
     print(pass_manager.names)
Ejemplo n.º 6
0
 def apply_passes(self, main_prog, startup_prog):
     pass_manager = PassManager([
         new_pass("build_cinn"),
         new_pass("fuse_elewise_add_act"),
     ])
     pass_manager.apply([main_prog], [startup_prog])
     op_types = [op.type for op in main_prog.global_block().ops]
     self.assertTrue('cinn_launch' in op_types)
 def apply_passes(self, main_prog, startup_prog):
     self._config["params_grads"] = self._params_grads
     pass_context = PassContext()
     auto_parallel_gradient_merge_pass = new_pass(
         "auto_parallel_gradient_merge_pass", self._config)
     auto_parallel_gradient_merge_pass.apply([main_prog], [startup_prog],
                                             pass_context)
Ejemplo n.º 8
0
    def _build_trainer_programs(self):
        append_send_ops_pass = new_pass("append_send_ops_pass", self.attrs)
        append_send_ops_pass.apply([self.cloned_main], [None], self.pass_ctx)

        self.attrs['origin_main_program'] = self.cloned_main

        if self.launch_barrier and self.launch_barrier_flag:
            wait_server_ready(self.server_endpoints)
Ejemplo n.º 9
0
    def apply_passes(self, main_prog, startup_prog):
        pass_manager = PassManager([new_pass("fuse_bn_act")])
        pass_manager.apply([main_prog], [startup_prog])
        print(pass_manager.names)

        op_type = []
        for op in main_prog.global_block().ops:
            op_type.append(op.type)
        self.assertTrue("fused_batch_norm_act" in op_type)
        self.assertTrue("fused_batch_norm_act_grad" in op_type)
    def apply_passes(self, main_prog, startup_prog):
        pass_manager = PassManager([new_pass("fuse_relu_depthwise_conv")])
        pass_manager.apply([main_prog], [startup_prog])
        print(pass_manager.names)

        op_type = []
        for op in main_prog.global_block().ops:
            if op.type == "depthwise_conv2d":
                self.assertTrue(op.desc.attr("fuse_relu_before_depthwise_conv"))
            op_type.append(op.type)
        self.assertTrue("depthwise_conv2d" in op_type)
Ejemplo n.º 11
0
    def apply_passes(self, main_prog, startup_prog):
        pass_manager = PassManager(
            [new_pass("inplace_addto_op", {"use_cuda": True})])
        pass_manager.apply([main_prog], [startup_prog])
        print(pass_manager.names)

        conv2d_grad_attr = []
        for op in main_prog.global_block().ops:
            if op.type == "conv2d_grad":
                conv2d_grad_attr.append(op.desc.attr("use_addto"))
        self.assertTrue(True in conv2d_grad_attr)
Ejemplo n.º 12
0
    def _apply_post_optimization_passes(self, main_program, startup_program,
                                        rank, params_grads):

        if self._dist_strategy.sharding:
            config = copy.deepcopy(self._dist_strategy.sharding_configs)
            config["dist_context"] = self._dist_context
            config["params_grads"] = params_grads
            config["global_rank"] = rank
            auto_parallel_sharding_pass = new_pass("auto_parallel_sharding",
                                                   config)
            auto_parallel_sharding_pass.apply(
                [main_program], [startup_program], self._pass_context)

        if self._dist_strategy.gradient_merge:
            config = copy.deepcopy(self._dist_strategy.gradient_merge_configs)
            config["dist_context"] = self._dist_context
            config["params_grads"] = params_grads
            auto_parallel_gradient_merge_pass = new_pass(
                "auto_parallel_gradient_merge_pass", config)
            auto_parallel_gradient_merge_pass.apply(
                [main_program], [startup_program], self._pass_context)
Ejemplo n.º 13
0
    def _build_trainer_programs(self):
        # print("build trainer program entry")
        # print("before ps program builder program:", self.cloned_main)
        add_lr_decay_table_pass = new_pass("add_lr_decay_table_pass",
                                           self.attrs)
        add_lr_decay_table_pass.apply([], [], self.pass_ctx)

        # print("before distributed op pass")
        distributed_ops_pass = new_pass("distributed_ops_pass", self.attrs)
        distributed_ops_pass.apply([self.cloned_main], [None], self.pass_ctx)

        delete_optimizer_pass = new_pass("delete_optimizer_pass", self.attrs)
        delete_optimizer_pass.apply([self.cloned_main], [None], self.pass_ctx)

        append_send_ops_pass = new_pass("append_send_ops_pass", self.attrs)
        append_send_ops_pass.apply([self.cloned_main], [None], self.pass_ctx)

        delete_extra_optimizer_pass = new_pass("delete_extra_optimizer_pass",
                                               self.attrs)
        delete_extra_optimizer_pass.apply([self.attrs['origin_main_program']],
                                          [self.cloned_startup], self.pass_ctx)

        fake_init_ops_pass = new_pass("fake_init_ops_pass", self.attrs)
        fake_init_ops_pass.apply([None], [self.cloned_startup], self.pass_ctx)

        self.attrs['origin_main_program'] = self.cloned_main
        self.attrs['origin_startup_program'] = self.cloned_startup
        # print("after ps program builder program:", self.cloned_main)

        if self.launch_barrier and self.launch_barrier_flag:
            wait_server_ready(self.server_endpoints)

        return
Ejemplo n.º 14
0
    def apply_passes(self, main_prog, startup_prog):
        pass_manager = PassManager([new_pass("fuse_optimizer")])
        pass_manager.apply([main_prog], [startup_prog])
        print(pass_manager.names)

        op_type = []
        for op in main_prog.global_block().ops:
            op_type.append(op.type)
            if op.type == "adam":
                self.assertTrue("@FUSEDVAR@_adam_Param_batch_norm2d_0.b_0" in
                                op.input("Param"))
                self.assertTrue("@FUSEDVAR@_adam_Grad_batch_norm2d_0.b_0@GRAD"
                                in op.input("Grad"))
        self.assertTrue("coalesce_tensor" in op_type)
Ejemplo n.º 15
0
 def _build_pserver_programs(self):
     is_sgd_adam = False
     ops = get_optimize_ops(self.attrs['origin_main_program'])
     if len(ops) == 0:
         return
     add_lr_decay_table_pass = new_pass('add_lr_decay_table_pass',
                                        self.attrs)
     add_lr_decay_table_pass.apply([], [], self.pass_ctx)
     for op in ops:
         if op.type in ["sgd", "adam"]:
             is_sgd_adam = True
             break
     if is_sgd_adam:
         return
Ejemplo n.º 16
0
    def _build_programs(self):
        if self.attrs['is_worker'] or self.attrs['is_heter_worker']:
            self._build_trainer_programs()
            ps_set_heter_pipeline_opt_pass = new_pass(
                "set_heter_pipeline_opt_pass", self.attrs)
            ps_set_heter_pipeline_opt_pass.apply([self.cloned_main],
                                                 [self.cloned_startup],
                                                 self.pass_ctx)

        elif self.attrs['is_server']:
            self._build_pserver_programs()
            self.loss.block.program = self.attrs['_main_server']
            fluid.framework.switch_startup_program(
                self.attrs['_startup_server'])
Ejemplo n.º 17
0
    def _build_trainer_programs(self):
        add_lr_decay_table_pass = new_pass("add_lr_decay_table_pass",
                                           self.attrs)
        add_lr_decay_table_pass.apply([], [], self.pass_ctx)

        distributed_ops_pass = new_pass("distributed_ops_pass", self.attrs)
        distributed_ops_pass.apply([self.cloned_main], [None], self.pass_ctx)

        delete_optimizer_pass = new_pass("delete_optimizer_pass", self.attrs)
        delete_optimizer_pass.apply([self.cloned_main], [None], self.pass_ctx)

        append_send_ops_pass = new_pass("append_send_ops_pass", self.attrs)
        append_send_ops_pass.apply([self.cloned_main], [None], self.pass_ctx)

        delete_extra_optimizer_pass = new_pass("delete_extra_optimizer_pass",
                                               self.attrs)
        delete_extra_optimizer_pass.apply([self.attrs['origin_main_program']],
                                          [self.cloned_startup], self.pass_ctx)

        fake_init_ops_pass = new_pass("fake_init_ops_pass", self.attrs)
        fake_init_ops_pass.apply([None], [self.cloned_startup], self.pass_ctx)

        if self.is_heter_worker:
            split_heter_worker_ops_pass = new_pass(
                "split_heter_worker_ops_pass", self.attrs)
            split_heter_worker_ops_pass.apply([self.cloned_main], [None],
                                              self.pass_ctx)
        else:
            split_trainer_ops_pass = new_pass("split_trainer_ops_pass",
                                              self.attrs)
            split_trainer_ops_pass.apply([self.cloned_main], [None],
                                         self.pass_ctx)

        set_heter_pipeline_opt_pass = new_pass('set_heter_pipeline_opt_pass',
                                               self.attrs)
        set_heter_pipeline_opt_pass.apply([self.cloned_main],
                                          [self.cloned_startup], self.pass_ctx)

        if self.launch_barrier and self.launch_barrier_flag:
            wait_server_ready(self.server_endpoints)

        return
Ejemplo n.º 18
0
 def pass_config(self):
     return [
         new_pass("fuse_elewise_add_act"),
         new_pass("fuse_all_reduce", {"max_memory_size": 1024 * 1024}),
     ]
Ejemplo n.º 19
0
 def _build_pserver_programs(self):
     add_listen_and_serv_pass = new_pass('add_listen_and_serv_pass',
                                         self.attrs)
     add_listen_and_serv_pass.apply([self.attrs['_main_server']], [None],
                                    self.pass_ctx)
     return
Ejemplo n.º 20
0
    def _build_trainer_programs(self):
        _main_file = ps_log_root_dir + '0_fl_worker_main_program.prototxt'
        #debug_program(_main_file, self.cloned_main)

        distributed_ops_pass = new_pass("distributed_ops_pass", self.attrs)
        distributed_ops_pass.apply([self.cloned_main], [None], self.pass_ctx)

        _main_file = ps_log_root_dir + '1_fl_worker_main_program.prototxt'
        #debug_program(_main_file, self.cloned_main)

        delete_optimizer_pass = new_pass("delete_optimizer_pass", self.attrs)
        delete_optimizer_pass.apply([self.cloned_main], [None], self.pass_ctx)

        _main_file = ps_log_root_dir + '2_fl_worker_main_program.prototxt'
        #debug_program(_main_file, self.cloned_main)

        append_send_ops_pass = new_pass("append_send_ops_pass", self.attrs)
        append_send_ops_pass.apply([self.cloned_main], [None], self.pass_ctx)

        _main_file = ps_log_root_dir + '3_fl_worker_main_program.prototxt'
        #debug_program(_main_file, self.cloned_main)

        delete_extra_optimizer_pass = new_pass("delete_extra_optimizer_pass",
                                               self.attrs)
        delete_extra_optimizer_pass.apply([self.attrs['origin_main_program']],
                                          [self.cloned_startup], self.pass_ctx)

        _main_file = ps_log_root_dir + '4_fl_worker_main_program.prototxt'
        #debug_program(_main_file, self.cloned_main)

        fake_init_ops_pass = new_pass("fake_init_ops_pass", self.attrs)
        fake_init_ops_pass.apply([None], [self.cloned_startup], self.pass_ctx)

        _main_file = ps_log_root_dir + '5_fl_worker_main_program.prototxt'
        #debug_program(_main_file, self.cloned_main)

        split_trainer_ops_pass = new_pass("split_fl_ops_pass", self.attrs)
        split_trainer_ops_pass.apply([self.cloned_main], [None], self.pass_ctx)

        if not self.is_heter_worker:
            self.part_a_program = self.pass_ctx._attrs['part_a_main_program']
            self.cloned_main = self.part_a_program
            _main_file = ps_log_root_dir + '8_fl_A_main_program.prototxt'
            debug_program(_main_file, self.cloned_main)
        else:
            self.part_b_program = self.pass_ctx._attrs['part_b_main_program']
            self.cloned_main = self.part_b_program
            _main_file = ps_log_root_dir + '8_fl_B_main_program.prototxt'
            debug_program(_main_file, self.cloned_main)

        set_heter_pipeline_opt_pass = new_pass('set_heter_pipeline_opt_pass',
                                               self.attrs)
        set_heter_pipeline_opt_pass.apply([self.cloned_main],
                                          [self.cloned_startup], self.pass_ctx)

        self.attrs['origin_startup_program'] = self.cloned_startup
        self.attrs['origin_main_program'] = self.cloned_main

        if not self.is_heter_worker:
            _main_file = ps_log_root_dir + 'final_fl_A_main_program.prototxt'
            debug_program(
                _main_file, self.attrs['origin_main_program'].
                _heter_pipeline_opt['section_program'])
        else:
            _main_file = ps_log_root_dir + 'final_fl_B_main_program.prototxt'
            debug_program(
                _main_file, self.attrs['origin_main_program'].
                _heter_pipeline_opt['section_program'])

        return