def run_pserver(pserver_id): remove_ps_flag(os.getpid()) scope = fluid.core.Scope() program = Program() with fluid.scope_guard(scope): with program_guard(program, startup_program=Program()): # create table parameter in scope place = fluid.CPUPlace() # create and initialize Param Variable param = scope.var('table').get_tensor() param_array = np.ones((5, 8)).astype("float32") for i in range(len(param_array)): param_array[i] *= param_array[i] * i + pserver_id * 10 + 1 param.set(param_array, place) optimize_block = program._create_block(program.global_block().idx) program.global_block().append_op(type="listen_and_serv", inputs={'X': []}, outputs={}, attrs={ "optimize_blocks": [optimize_block], "endpoint": '127.0.0.1:0', "Fanin": 1, "distributed_mode": DistributedMode.SYNC, "grad_to_block_id": [] }) exe = fluid.Executor(place) exe.run(program)
def run_pserver(pserver_id, use_cuda, sync_mode): scope = fluid.core.Scope() program = Program() with fluid.scope_guard(scope): with program_guard(program, startup_program=Program()): # create table parameter in scope place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() # create and initialize Param Variable param = scope.var('table').get_tensor() param_array = np.ones((10, 8)).astype("float32") for i in range(len(param_array)): param_array[i] *= param_array[i] * i + pserver_id * 10 param.set(param_array, place) optimize_block = program._create_block(program.global_block().idx) program.global_block().append_op( type="listen_and_serv", inputs={'X': []}, outputs={}, attrs={ "optimize_blocks": [optimize_block], "endpoint": '127.0.0.1:0', "Fanin": 1, "sync_mode": True, "grad_to_block_id": [] }) exe = fluid.Executor(place) exe.run(program)
class RNNMemoryHelperOpTest(unittest.TestCase): def setUp(self): self.program = Program() self.place = core.CPUPlace() self.X = self.program.global_block().create_var(name='X', shape=[2, 3], dtype='float32') self.Out = self.program.global_block().create_var(name='Out', shape=[2, 3], dtype='float32') self.program.global_block().append_op(type='rnn_memory_helper', inputs={"X": self.X}, outputs={"Out": self.Out}, attrs={}) def test_forward(self): x_np = np.random.normal(size=(2, 3)).astype("float32") self.feed_map = {'X': x_np} self.fetch_list = [self.Out] exe = Executor(self.place) out = exe.run(self.program, feed=self.feed_map, fetch_list=self.fetch_list) self.assertTrue(np.allclose(out[0], x_np, rtol=1e-5))
def load_splited_program(save_path): startup_program, main_program = None, None with open(os.path.join(save_path, "startup_program"), "rb") as fin: startup_program = Program().parse_from_string(fin.read()) with open(os.path.join(save_path, 'main_program'), "rb") as fin: main_program = Program().parse_from_string(fin.read()) with open(os.path.join(save_path, "model_info"), "r") as fin: model_info = json.loads(fin.read()) # params for item in model_info["params"]: main_para = main_program.global_block().var(item['name']) main_para.__class__ = Parameter main_para.regularizer = None main_para.optimize_attr = {'learning_rate': 1.0} main_para.trainable = item['trainable'] main_para.is_distributed = False startup_para = startup_program.global_block().var(item['name']) startup_para.__class__ = Parameter startup_para.regularizer = None startup_para.optimize_attr = {'learning_rate': 1.0} startup_para.trainable = item['trainable'] startup_para.is_distributed = False # stop_gradient for stop_name in model_info["stop_gradient_vars"]: stop_var = main_program.global_block().var(stop_name) stop_var.stop_gradient = True return startup_program, main_program, model_info
def _calc_output(self, place): op_proto = OpProtoHolder.instance().get_op_proto(self.op_type) program = Program() block = program.global_block() inputs = append_input_output(block, op_proto, self.inputs, True) outputs = append_input_output(block, op_proto, self.outputs, False) op = block.append_op( type=self.op_type, inputs=inputs, outputs=outputs, attrs=self.attrs if hasattr(self, "attrs") else dict()) # infer variable type and infer shape in compile-time op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc) fetch_list = [] for var_name, var in outputs.iteritems(): if var_name in self.outputs: if isinstance(var, list): for v in var: fetch_list.append(v) else: fetch_list.append(var) feed_map = self.feed_var(inputs, place) exe = Executor(place) outs = exe.run(program, feed=feed_map, fetch_list=fetch_list, return_numpy=False) return outs, fetch_list
def _get_gradient(self, input_to_check, place, output_names, no_grad_set, parallel=False): prog = Program() block = prog.global_block() self._append_ops(block) loss = append_loss_ops(block, output_names) param_grad_list = append_backward(loss=loss, parameter_list=input_to_check, no_grad_set=no_grad_set) inputs = self._get_inputs(block) feed_dict = self.feed_var(inputs, place) fetch_list = [g for p, g in param_grad_list] if parallel: use_cuda = False if isinstance(place, fluid.CUDAPlace(0)): use_cuda = True executor = fluid.ParallelExecutor(use_cuda=use_cuda, loss_name=loss.name, main_program=prog) else: executor = Executor(place) return list( map(np.array, executor.run(prog, feed_dict, fetch_list, return_numpy=False)))
def save_persistable_nodes(executor, dirname, graph): """ Save persistable nodes to the given directory by the executor. Args: executor(Executor): The executor to run for saving node values. dirname(str): The directory path. graph(IrGraph): All the required persistable nodes in the graph will be saved. """ persistable_node_names = set() persistable_nodes = [] all_persistable_nodes = graph.all_persistable_nodes() for node in all_persistable_nodes: name = cpt.to_text(node.name()) if name not in persistable_node_names: persistable_node_names.add(name) persistable_nodes.append(node) program = Program() var_list = [] for node in persistable_nodes: var_desc = node.var() if var_desc.type() == core.VarDesc.VarType.RAW or \ var_desc.type() == core.VarDesc.VarType.READER: continue var = program.global_block().create_var( name=var_desc.name(), shape=var_desc.shape(), dtype=var_desc.dtype(), type=var_desc.type(), lod_level=var_desc.lod_level(), persistable=var_desc.persistable()) var_list.append(var) fluid.io.save_vars(executor=executor, dirname=dirname, vars=var_list)
def run(self, input, output, attrs): program = Program() block = program.global_block() op_proto = self.op_proto inputs = self.__append_input_output(block, op_proto, input, True) outputs = self.__append_input_output(block, op_proto, output, False) op = block.append_op(type=self.op_name, inputs=inputs, outputs=outputs, attrs=attrs) op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc) fetch_list = [] for var_name, var in outputs.iteritems(): if var_name in outputs: if isinstance(var, list): for v in var: fetch_list.append(v) else: fetch_list.append(var) feed_map = self.__feed_var(inputs, input) exe = Executor(self.place) result = exe.run(program, feed=feed_map, fetch_list=fetch_list, return_numpy=False) atcual_dic = {} for i, obj in enumerate(fetch_list): atcual_dic[obj.name] = np.array(result[i]) return atcual_dic
class TestShrinkRNNMemoryBase(unittest.TestCase): def setUp(self): self.main_program = Program() switch_main_program(self.main_program) x = layers.data('x', shape=[100], dtype='float32') x.stop_gradient = False rank_table_tensor = layers.data( 'rank_table_tensor', shape=[1], dtype='float32', lod_level=1) table = layers.lod_rank_table(x=rank_table_tensor) i = layers.zeros(dtype='int64', shape=[1]) self.mem1 = layers.shrink_memory(x=x, i=i, table=table) i = layers.increment(x=i) i.stop_gradient = True self.mem2 = layers.shrink_memory(x=self.mem1, i=i, table=table) i = layers.increment(x=i) i.stop_gradient = True self.mem3 = layers.shrink_memory(x=self.mem2, i=i, table=table) mem3_mean = layers.mean(self.mem3) append_backward(loss=mem3_mean) self.x_grad = self.main_program.global_block().var('x@GRAD') def sum_lodtensor(self, tensor): sum_res = 0.0 for i in xrange(np.product(tensor.get_dims())): sum_res += tensor.get_float_element(i) return sum_res
def test_grad(self): place = core.CPUPlace() program = Program() with program_guard(program): x = layers.data(name='x', shape=[1], dtype='float32', stop_gradient=False) table = layers.lod_rank_table(x, level=0) array = layers.lod_tensor_to_array(x, table) result = layers.array_to_lod_tensor(array, table) mean = layers.mean(result) append_backward(mean) tensor = core.LoDTensor() tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place) tensor.set_recursive_sequence_lengths([[3, 6, 1]]) g_vars = program.global_block().var(x.name + "@GRAD") exe = Executor(place) g_out = [ numpy.array(item).sum() for item in exe.run(program, feed={'x': tensor}, fetch_list=[g_vars], return_numpy=False) ] g_out_sum = numpy.array(g_out).sum() self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
def test_grad(self): place = core.CPUPlace() program = Program() with program_guard(program): x = layers.data( name='x', shape=[1], dtype='float32', stop_gradient=False) table = layers.lod_rank_table(x, level=0) array = layers.lod_tensor_to_array(x, table) result = layers.array_to_lod_tensor(array, table) mean = layers.mean(result) append_backward(mean) tensor = core.LoDTensor() tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place) tensor.set_lod([[0, 3, 9, 10]]) g_vars = program.global_block().var(x.name + "@GRAD") exe = Executor(place) g_out = [ numpy.array(item).sum() for item in exe.run(program, feed={'x': tensor}, fetch_list=[g_vars], return_numpy=False) ] g_out_sum = numpy.array(g_out).sum() self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
def load_vars_by_dict(executor, name_var_dict, main_program=None): from paddle.fluid.framework import Program, Variable from paddle.fluid import core load_prog = Program() load_block = load_prog.global_block() if main_program is None: main_program = fluid.default_main_program() if not isinstance(main_program, Program): raise TypeError("program should be as Program type or None") for each_var_name in name_var_dict.keys(): assert isinstance(name_var_dict[each_var_name], Variable) if name_var_dict[each_var_name].type == core.VarDesc.VarType.RAW: continue load_block.append_op( type='load', inputs={}, outputs={'Out': [name_var_dict[each_var_name]]}, attrs={'file_path': each_var_name}) executor.run(load_prog)
def get_parameter_value(para, executor): """ Get the LoDTensor value of the given parameter. Args: para(Parameter): The parameter to get value from. executor(Executor): The executor to run for retrieving the value. Returns: numpy.array: The given parameter's values. Raises: AssertionError: If the `para` is not an instance of Parameter. Examples: .. code-block:: python exe = fluid.Executor(fluid.CPUPlace()) param = fluid.default_main_program().global_block().var('fc.w') p = fluid.io.get_parameter_value(param, exe) """ assert is_parameter(para) get_program = Program() block = get_program.global_block() new_var = _clone_var_in_block_(block, para) return executor.run(get_program, feed={}, fetch_list=[new_var])[0]
def test_program_clone(self): prog = Program() x = prog.global_block().create_var( name='X', shape=[1000, 784], dtype='float32') y = prog.global_block().create_var( name='Y', shape=[784, 100], dtype='float32') out = prog.global_block().create_var(name='Out', dtype='float32') prog.global_block().append_op( type="mul", inputs={'X': [x], 'Y': [y]}, outputs={'Out': [out]}) # FIXME(yuyang18): We manual compare the output string, since the order # of variable could be changed. print(prog) print(prog.clone())
class RNNMemoryHelperGradOpWithoutInputTest(unittest.TestCase): def setUp(self): self.program = Program() self.fake_program = Program() self.place = core.CPUPlace() self.input_names = ['X', 'Out'] self.input_vars = { name: self.program.global_block().create_var(name=name, shape=[2, 3], dtype='float32') for name in self.input_names } self.input_vars["Out@GRAD"] = \ self.fake_program.global_block().create_var( name="Out@GRAD", shape=[2, 3], dtype='float32') self.output_names = ['X@GRAD'] self.output_vars = { name: self.program.global_block().create_var(name=name, shape=[2, 3], dtype='float32') for name in self.output_names } self.program.global_block().append_op(type='rnn_memory_helper_grad', inputs=self.input_vars, outputs=self.output_vars, attrs={}) def test_backward(self): self.feed_map = { name: np.random.normal(size=(2, 3)).astype("float32") for name in ['X', 'Out'] } self.fetch_list = [self.output_vars['X@GRAD']] exe = Executor(self.place) out = exe.run(self.program, feed=self.feed_map, fetch_list=self.fetch_list) self.assertTrue( np.allclose(out[0], np.zeros(shape=(2, 3)).astype("float32"), rtol=1e-5))
def test_grad(self): place = core.CPUPlace() program = Program() with program_guard(program): x = layers.data(name='x', shape=[1], dtype='float32', stop_gradient=False) y = layers.data(name='y', shape=[1], dtype='bool', stop_gradient=False) level = 0 out_true, out_false = split_lod_tensor(input=x, mask=y, level=level) out = merge_lod_tensor(in_true=out_true, in_false=out_false, mask=y, x=x, level=level) mean = layers.mean(out) append_backward(mean) tensor = core.LoDTensor() tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place) tensor.set_recursive_sequence_lengths([[3, 6, 1]]) mask_np = np.array([0, 1, 0]).astype('bool') mask_np = np.expand_dims(mask_np, axis=1) mask = core.LoDTensor() mask.set(mask_np, place) exe = Executor(place) scope = core.Scope() g_vars = program.global_block().var(x.name + "@GRAD") g_out = [ item.sum() for item in map( np.array, exe.run(program, feed={ 'x': tensor, 'y': mask }, fetch_list=[g_vars], scope=scope, return_numpy=False)) ] g_out_sum = np.array(g_out).sum() self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
def test_parse_program_from_string(self): prog = Program() x = prog.global_block().create_var( name='X', shape=[1000, 784], dtype='float32') y = prog.global_block().create_var( name='Y', shape=[784, 100], dtype='float32') out = prog.global_block().create_var(name='Out', dtype='float32') prog.global_block().append_op( type="mul", inputs={'X': [x], 'Y': [y]}, outputs={'Out': [out]}) binary_str = prog.desc.serialize_to_string() prog_restored = Program.parse_from_string(binary_str) print(prog) print(prog_restored)
def load_whole_program(program_input): with open(program_input + '/startup_program', "rb") as fin: new_startup = Program().parse_from_string(fin.read()) with open(program_input + '/main_program', "rb") as fin: new_main = Program().parse_from_string(fin.read()) para_list = [] with open(program_input + '/para_info', 'r') as fin: for line in fin: current_para = {} para = line[:-1].split(":") current_para["name"] = para[0] if para[1] == 'True': current_para['trainable'] = True else: current_para['trainable'] = False para_list.append(current_para) with open(program_input + '/stop_gradient', 'r') as fin: for line in fin: stop_name = line[:-1] stop_var = new_main.global_block().var(stop_name) stop_var.stop_gradient = True for item in para_list: main_para = new_main.global_block().var(item['name']) main_para.__class__ = Parameter main_para.regularizer = None main_para.optimize_attr = {'learning_rate': 1.0} main_para.trainable = item['trainable'] main_para.is_distributed = False startup_para = new_startup.global_block().var(item['name']) startup_para.__class__ = Parameter startup_para.regularizer = None startup_para.optimize_attr = {'learning_rate': 1.0} startup_para.trainable = item['trainable'] startup_para.is_distributed = False return new_startup, new_main
def test_program_clone(self): prog = Program() x = prog.global_block().create_var(name='X', shape=[1000, 784], dtype='float32') y = prog.global_block().create_var(name='Y', shape=[784, 100], dtype='float32') out = prog.global_block().create_var(name='Out', dtype='float32') prog.global_block().append_op(type="mul", inputs={ 'X': [x], 'Y': [y] }, outputs={'Out': [out]}) # FIXME(yuyang18): We manual compare the output string, since the order # of variable could be changed. print(prog) print(prog.clone())
def _calc_output(self, place, parallel=False, no_check_set=None, loss=None): program = Program() block = program.global_block() self._append_ops(block) inputs = self._get_inputs(block) outputs = self._get_outputs(block) feed_map = self.feed_var(inputs, place) if parallel: use_cuda = False if isinstance(place, fluid.CUDAPlace(0)): use_cuda = True if loss: executor = fluid.ParallelExecutor(use_cuda=use_cuda, loss_name=loss.name, main_program=program) else: executor = fluid.ParallelExecutor(use_cuda=use_cuda, main_program=program) else: executor = Executor(place) fetch_list = getattr(self, "fetch_list", []) # if the fetch_list is customized by user, we use it directly. # if not, fill the fetch_list by the user configured outputs in test. if len(fetch_list) == 0: for var_name, var in six.iteritems(outputs): if no_check_set is not None and var_name in no_check_set: continue if isinstance(var, list): for v in var: fetch_list.append(v) else: fetch_list.append(var) # if the fetch_list still empty, fill the fetch_list by the operator output. if len(fetch_list) == 0: for out_name, out_dup in Operator.get_op_outputs(self.op_type): fetch_list.append(str(out_name)) # fetch_list = map(block.var, fetch_list) if not isinstance(fetch_list[0], fluid.framework.Variable): fetch_list = list(map(block.var, fetch_list)) outs = executor.run(program, feed=feed_map, fetch_list=fetch_list, return_numpy=False) return outs, fetch_list
def __save_remote_params(executor, dirname, remote_params_map): """ recive params on pserver through rpc. if the params are be sliced, will concat them to one, then save it. """ if not remote_params_map: return prog = Program() block = prog.global_block() # recv optimize vars from pserver for name, remote_params in remote_params_map.items(): origin = remote_params[0].origin is_slice = remote_params[0].is_slice slices = [None] * len(remote_params) slice_varnames = [None] * len(remote_params) remote_varnames = [None] * len(remote_params) endpoints = [None] * len(remote_params) for idx, optimizer in enumerate(remote_params): block_id = optimizer.block_id slice = optimizer.slice endpoint = optimizer.endpoint index = block_id if is_slice else idx slices[index] = slice slice_varnames[index] = "{}.slice.{}".format(slice.name, idx) remote_varnames[index] = slice.name endpoints[index] = endpoint slice_shapes = [] for slice in slices: tmp = [str(dim) for dim in slice.shape] slice_shapes.append(",".join(tmp)) block.append_op( type='recv_save', attrs={ "trainer_id": 0, "shape": origin.shape, "slice_shapes": slice_shapes, "slice_varnames": slice_varnames, "remote_varnames": remote_varnames, "endpoints": endpoints, "file_path": os.path.join(dirname, origin.name) }) executor.run(prog)
def test_parse_program_from_string(self): prog = Program() x = prog.global_block().create_var(name='X', shape=[1000, 784], dtype='float32') y = prog.global_block().create_var(name='Y', shape=[784, 100], dtype='float32') out = prog.global_block().create_var(name='Out', dtype='float32') prog.global_block().append_op(type="mul", inputs={ 'X': [x], 'Y': [y] }, outputs={'Out': [out]}) binary_str = prog.desc.serialize_to_string() prog_restored = Program.parse_from_string(binary_str) print(prog) print(prog_restored)
def _load_sparse_params(self, executor, dirname, varnames, main_program=None): assert vars != None check_vars = [] load_prog = Program() load_block = load_prog.global_block() def _in_varnames(var): return var.name in varnames load_vars = list( filter(_in_varnames, fluid.default_main_program().list_vars())) if main_program is None: main_program = self.origin_main_program from paddle.fluid.incubate.fleet.parameter_server.ir.public import _get_varname_parts for each_var in load_vars: assert isinstance(each_var, Variable) origin_varname, _, _ = _get_varname_parts(each_var.name) new_var = fluid.io._clone_var_in_block_(load_block, each_var) var_path = os.path.join(dirname, origin_varname) if not os.path.exists(var_path): raise ValueError( "SelectedRows var {} can not find at {}".format( new_var.name, var_path)) if os.path.isfile(var_path): load_block.append_op(type='sparse_tensor_load', inputs={}, outputs={'Out': [new_var]}, attrs={ 'file_path': os.path.join(dirname, origin_varname), 'node_index': self.role_maker._server_index(), 'node_num': self.role_maker._server_num(), 'shape': each_var.shape }) check_vars.append(each_var) executor.run(load_prog)
def get_parameter_value(para, executor): """ Get the LoDTensor for the parameter :param executor: executor for retrieving the value :param para: the given parameter :return: the LoDTensor for the parameter """ assert is_parameter(para) get_program = Program() block = get_program.global_block() new_var = _clone_var_in_block_(block, para) return executor.run(get_program, feed={}, fetch_list=[new_var])[0]
def load_persistable_nodes(executor, dirname, graph): """ Load persistable node values from the given directory by the executor. Args: executor(Executor): The executor to run for loading node values. dirname(str): The directory path. graph(IrGraph): All the required persistable nodes in the graph will be loaded. """ persistable_node_names = set() persistable_nodes = [] all_persistable_nodes = graph.all_persistable_nodes() for node in all_persistable_nodes: name = cpt.to_text(node.name()) if name not in persistable_node_names: persistable_node_names.add(name) persistable_nodes.append(node) program = Program() var_list = [] def _exist(var): return os.path.exists(os.path.join(dirname, var.name)) def _load_var(name, scope): return np.array(scope.find_var(name).get_tensor()) def _store_var(name, array, scope, place): tensor = scope.find_var(name).get_tensor() tensor.set(array, place) for node in persistable_nodes: var_desc = node.var() if var_desc.type() == core.VarDesc.VarType.RAW or \ var_desc.type() == core.VarDesc.VarType.READER: continue var = program.global_block().create_var( name=var_desc.name(), shape=var_desc.shape(), dtype=var_desc.dtype(), type=var_desc.type(), lod_level=var_desc.lod_level(), persistable=var_desc.persistable()) if _exist(var): var_list.append(var) else: _logger.info("Cannot find the var %s!!!" %(node.name())) fluid.io.load_vars(executor=executor, dirname=dirname, vars=var_list)
def _get_gradient(self, input_to_check, place, output_names, no_grad_set, parallel=False): prog = Program() block = prog.global_block() self._append_ops(block) loss = append_loss_ops(block, output_names) param_grad_list = append_backward(loss=loss, parameter_list=input_to_check, no_grad_set=no_grad_set) inputs = self._get_inputs(block) feed_dict = self.feed_var(inputs, place) fetch_list = [g for p, g in param_grad_list] return_results = [Manager().list() for _ in range(len(fetch_list))] def closure(**kwargs): role = kwargs['role'] pfl_mpc.init("privc", role, "localhost", self.server, int(self.port)) #init_op = fluid.default_main_program().global_block().ops[0] #_insert_init_op(program, init_op) executor = Executor(place) executor.run() outs = executor.run(prog, feed=feed_dict, fetch_list=fetch_list) for idx in range(len(fetch_list)): return_results[idx].append(outs[idx]) ret = self.multi_party_run(target=closure) self.assertEqual(ret[0], True) outs = [] for idx in range(len(fetch_list)): outs.append(self.reconstruct(np.array(return_results[idx]))) return outs
def _save_lookup_tables_by_notify(executor, dirname, lookup_table, pserver_endpoints): """ This function will send checkpoint notify message from Trainer 0 to all the pservers. The checkpoint notify message contains lookup table name, the absolute path on pserver to save lookup_table. Args: executor(Executor): The executor to run for send checkpoint notify. dirname(str): The folder where to save. lookup_table(string): the lookup table name, when use distribute lookup table, we can get lookup table name by DistributeTranspiler. table_name ps_endpoint_list(list): the parameter server ip:port list. when use distribute lookup table, we can get ps_endpoint_list by distribute arguments. Return: None Examples: .. code-block:: python exe = fluid.Executor(fluid.CPUPlace()) param_path = "./my_paddle_model" table_name = "share_w" ps_endpoints = ["127.0.0.1:6000","127.0.0.1:6001"] _save_pserver_vars_by_notify(executor=exe, dirname=param_path, lookup_table=table_name, pserver_endpoints=ps_endpoints) """ pserver_notify_program = Program() pserver_notify_block = pserver_notify_program.global_block() attrs = {} attrs['epmap'] = pserver_endpoints attrs['dir'] = dirname attrs['lookup_table'] = lookup_table pserver_notify_block.append_op(type='checkpoint_notify', inputs={}, outputs={}, attrs=attrs) executor.run(pserver_notify_program)
def _save_distributed_params(self, executor, dirname, context, mode): prog = Program() block = prog.global_block() for name, var_ctx in context.items(): block.append_op(type='checkpoint_notify', attrs={ "varname": name, "mode": mode, "slice_varnames": var_ctx.split_varnames(), "remote_varnames": var_ctx.split_varnames(), "endpoints": var_ctx.split_endpoints(), "dirname": dirname }) executor.run(prog) return context.keys()
def _load_slice_up_vars(executor, dirname, slice_vars_and_attrs): if not slice_vars_and_attrs: return load_prog = Program() load_block = load_prog.global_block() need_delete_vars = [] for var_tuple in slice_vars_and_attrs: orig_var = var_tuple[0] start = var_tuple[1] slice_var = var_tuple[2] end = start + slice_var.shape[0] clone_orig_var = load_block.create_var(name=orig_var.name, type=orig_var.type, shape=orig_var.shape, dtype=orig_var.dtype, persistable=True) clone_slice_var = load_block.create_var(name=slice_var.name, type=slice_var.type, shape=slice_var.shape, dtype=slice_var.dtype, persistable=True) load_block.append_op( type='load', inputs={}, outputs={'Out': [clone_orig_var]}, attrs={'file_path': os.path.join(dirname, clone_orig_var.name)}) load_block.append_op(type="slice", inputs={'Input': clone_orig_var}, outputs={'Out': clone_slice_var}, attrs={ 'axes': [0], 'starts': [start], 'ends': [end] }) need_delete_vars.append(clone_orig_var) load_block.append_op( type='delete_var', inputs={'X': need_delete_vars}, ) executor.run(load_prog)
def _construct_grad_program_from_forward(self, fwd_program, grad_op_desc, op_grad_to_var): """Generate grad_program which contains the grad_op. Args: fwd_program (tuple): The program that contains grad_op_desc's corresponding forward op. grad_op_desc (OpDesc): The OpDesc of grad op. op_grad_to_var (dict): The relation of variables in grad op and its forward op. Returns: grad_program (program): The program which contains the grad_op. """ grad_program = Program() grad_block = grad_program.global_block() new_op_desc = grad_block.desc.append_op() new_op_desc.copy_from(grad_op_desc) grad_program._sync_with_cpp() # Create grad vars based on fwd vars (shape and dtype) for arg in grad_op_desc.input_arg_names( ) + grad_op_desc.output_arg_names(): fwd_var_name = op_grad_to_var.get(arg, None) if fwd_var_name is None: fwd_var_name = arg fwd_var = fwd_program.global_block().vars.get(fwd_var_name) assert fwd_var is not None, "{} cannot be found".format( fwd_var_name) grad_var = grad_block.create_var(name=arg, dtype=fwd_var.dtype, shape=fwd_var.shape, type=fwd_var.type, persistable=False) # Some variables' tensors hold no buffer (tensor's _holder is NULL), like XShape in reshape2 op, # and the shapes of those variables contain 0 (eg. Xshape.shape = [0, 2, 5]). # Set persistable for those variables in order to get them from global_scope for inplace grad test directly other than feed them, # since feed op calls check_memory_size() which fails when tensor's holder_ is NULL. if 0 in grad_var.shape: grad_var.persistable = True grad_program._sync_with_cpp() return grad_program
def __save_distributed_lookup_tables(executor, dirname, distributed_lookup_table, endpoints): """ because the distributed lookup table may too huge to merge and save at one place, it will be saved at parameter server independent respectively. the save directory is dirname/"__lookup_table__". """ prog = Program() block = prog.global_block() # if there is lookup table, the trainer 0 will notify all pserver to save. lookup_table_filename = os.path.join(dirname, "__lookup_table__") attrs = {} attrs['epmap'] = endpoints attrs['dir'] = lookup_table_filename attrs['lookup_table'] = distributed_lookup_table block.append_op( type='checkpoint_notify', inputs={}, outputs={}, attrs=attrs) executor.run(prog)
def _save_dense_params(self, executor, dirname, context, main_program): self._communicator.recv() prog = Program() block = prog.global_block() local_vars = [] for name, var_ctx in context.items(): if len(var_ctx.origin_varnames()) != 1: raise ValueError("Dense can not support split now.") varname = var_ctx.origin_varnames()[0] local_vars.append(varname) optimizer = self._get_optimizer_op(varname) reshaped_varnames, origin_varnames = self._get_optimizer_status( optimizer.type, varname) for var_name in [varname] + reshaped_varnames + origin_varnames: var = self.origin_main_program.global_block().vars[var_name] block.append_op(type='recv_save', attrs={ "trainer_id": self.role_maker._worker_index(), "shape": var.shape, "slice_shapes": [",".join([str(i) for i in var.shape])], "slice_varnames": [var.name], "remote_varnames": [var.name], "is_sparse": False, "endpoints": var_ctx.split_endpoints(), "file_path": os.path.join(dirname, var.name) }) executor.run(prog) return local_vars
def save_vars(executor, dirname, main_program=None, vars=None, predicate=None, filename=None): """ Save variables to directory by executor. :param executor: executor that save variable :param dirname: directory path :param main_program: program. If vars is None, then filter all variables in this program which fit `predicate`. Default default_main_program. :param predicate: The Predicate describes a callable that returns a variable as a bool. If it returns true, the corresponding input variable will be saved. :param vars: variables need to be saved. If vars is specified, program & predicate will be ignored :param filename: The name of a single file that all vars are saved to. If it is None, save variables to separate files. :return: None """ if vars is None: if main_program is None: main_program = default_main_program() if not isinstance(main_program, Program): raise TypeError("program should be as Program type or None") save_vars( executor, dirname=dirname, vars=filter(predicate, main_program.list_vars()), filename=filename) else: save_program = Program() save_block = save_program.global_block() save_var_map = {} for each_var in vars: # NOTE: don't save the variable which type is RAW if each_var.type == core.VarDesc.VarType.RAW: continue new_var = _clone_var_in_block_(save_block, each_var) if filename is None: save_block.append_op( type='save', inputs={'X': [new_var]}, outputs={}, attrs={'file_path': os.path.join(dirname, new_var.name)}) else: save_var_map[new_var.name] = new_var if filename is not None: save_var_list = [] for name in sorted(save_var_map.keys()): save_var_list.append(save_var_map[name]) save_block.append_op( type='save_combine', inputs={'X': save_var_list}, outputs={}, attrs={'file_path': os.path.join(dirname, filename)}) executor.run(save_program)
def _get_gradient(self, input_to_check, place, output_names, no_grad_set): prog = Program() block = prog.global_block() inputs_with_np = { key: value for (key, value) in OpTest._create_var_descs_( block, getattr(self, 'inputs', {})) } outputs_with_np = { key: val for (key, val) in OpTest._create_var_descs_( block, getattr(self, 'outputs', {})) } inputs = { k: [item[0] for item in inputs_with_np[k]] for k in inputs_with_np } outputs = { k: [item[0] for item in outputs_with_np[k]] for k in outputs_with_np } op = block.append_op( type=self.op_type, inputs=inputs, outputs=outputs, attrs=getattr(self, 'attrs', {})) # infer variable type and infer shape in compile-time op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc) mean_inputs = map(block.var, output_names) if len(mean_inputs) == 1: loss = block.create_var(dtype=mean_inputs[0].dtype, shape=[1]) op = block.append_op( inputs={"X": mean_inputs}, outputs={"Out": loss}, type='mean') op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc) else: avg_sum = [] for cur_loss in mean_inputs: cur_avg_loss = block.create_var(dtype=cur_loss.dtype, shape=[1]) op = block.append_op( inputs={"X": [cur_loss]}, outputs={"Out": [cur_avg_loss]}, type="mean") op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc) avg_sum.append(cur_avg_loss) loss_sum = block.create_var(dtype=avg_sum[0].dtype, shape=[1]) op_sum = block.append_op( inputs={"X": avg_sum}, outputs={"Out": loss_sum}, type='sum') op_sum.desc.infer_var_type(block.desc) op_sum.desc.infer_shape(block.desc) loss = block.create_var(dtype=loss_sum.dtype, shape=[1]) op_loss = block.append_op( inputs={"X": loss_sum}, outputs={"Out": loss}, type='scale', attrs={'scale': 1.0 / float(len(avg_sum))}) op_loss.desc.infer_var_type(block.desc) op_loss.desc.infer_shape(block.desc) param_grad_list = append_backward( loss=loss, parameter_list=input_to_check, no_grad_set=no_grad_set) feed_dict = { item[0].name: OpTest._numpy_to_lod_tensor(item[1], item[2], place) for p_name in inputs_with_np for item in inputs_with_np[p_name] } fetch_list = [g for p, g in param_grad_list] executor = Executor(place) return map(np.array, executor.run(prog, feed_dict, fetch_list, return_numpy=False))
def load_vars(executor, dirname, main_program=None, vars=None, predicate=None, filename=None): """ Load variables from directory by executor. :param executor: executor that load variable :param dirname: directory path :param main_program: program. If vars is None, then filter all variables in this program which fit `predicate`. Default default_main_program(). :param predicate: The Predicate describes a callable that returns a variable as a bool. If it returns true, the corresponding input variable will be loaded. :param vars: variables need to be loaded. If vars is specified, program & predicate will be ignored :param filename: The name of the single file that all vars are loaded from. If it is None, load variables from separate files. :return: None """ if vars is None: if main_program is None: main_program = default_main_program() if not isinstance(main_program, Program): raise TypeError("program's type should be Program") load_vars( executor, dirname=dirname, vars=filter(predicate, main_program.list_vars()), filename=filename) else: load_prog = Program() load_block = load_prog.global_block() load_var_map = {} for each_var in vars: assert isinstance(each_var, Variable) if each_var.type == core.VarDesc.VarType.RAW: continue new_var = _clone_var_in_block_(load_block, each_var) if filename is None: load_block.append_op( type='load', inputs={}, outputs={'Out': [new_var]}, attrs={'file_path': os.path.join(dirname, new_var.name)}) else: load_var_map[new_var.name] = new_var if filename is not None: load_var_list = [] for name in sorted(load_var_map.keys()): load_var_list.append(load_var_map[name]) load_block.append_op( type='load_combine', inputs={}, outputs={"Out": load_var_list}, attrs={'file_path': os.path.join(dirname, filename)}) executor.run(load_prog)
class TestDyRnnStaticInput(unittest.TestCase): def setUp(self): self._delta = 0.005 self._max_sequence_len = 3 self._program = Program() switch_main_program(self._program) self.output_dim = 10 self.place = core.CPUPlace() self.prepare_x_tensor() self.prepare_static_input_tensor() self.exe = fluid.Executor(self.place) def prepare_x_tensor(self): self.x_tensor_dim = 10 lod = [[0, 2, 3, 6]] shape = [lod[0][-1], self.x_tensor_dim] self.x_tensor_data = np.random.random(shape).astype('float32') self.x_tensor = core.LoDTensor() self.x_tensor.set_lod(lod) self.x_tensor.set(self.x_tensor_data, self.place) def prepare_static_input_tensor(self): self.static_input_tensor_dim = 4 lod = [[0, 1, 3, 6]] shape = [lod[0][-1], self.static_input_tensor_dim] self.static_input_data = np.random.random(shape).astype('float32') self.static_input_tensor = core.LoDTensor() self.static_input_tensor.set_lod(lod) self.static_input_tensor.set(self.static_input_data, self.place) def fetch_value(self, var): fetch_outs = self.exe.run(feed={ 'x_tensor': self.x_tensor, 'static_input_tensor': self.static_input_tensor }, fetch_list=[var], return_numpy=False) return self._lodtensor_to_ndarray(fetch_outs[0]) def _lodtensor_to_ndarray(self, lod_tensor): dims = lod_tensor.get_dims() ndarray = np.zeros(shape=dims).astype('float32') for i in xrange(np.product(dims)): ndarray.ravel()[i] = lod_tensor.get_float_element(i) return ndarray, lod_tensor.lod() def build_graph(self, only_forward=False): x_tensor = fluid.layers.data( name='x_tensor', shape=[self.x_tensor_dim], dtype='float32', lod_level=1) x_tensor.stop_gradient = False static_input_tensor = fluid.layers.data( name='static_input_tensor', shape=[self.static_input_tensor_dim], dtype='float32', lod_level=1) static_input_tensor.stop_gradient = False if only_forward: static_input_out_array = self._program.global_block().create_var( name='static_input_out_array', type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, dtype='float32') static_input_out_array.stop_gradient = True rnn = fluid.layers.DynamicRNN() with rnn.block(): step_x = rnn.step_input(x_tensor) step_static_input = rnn.static_input(static_input_tensor) if only_forward: fluid.layers.array_write( x=step_static_input, i=rnn.step_idx, array=static_input_out_array) last = fluid.layers.sequence_pool( input=step_static_input, pool_type='last') projected = fluid.layers.fc(input=[step_x, last], size=self.output_dim) rnn.output(projected) if only_forward: static_input_step_outs = [] step_idx = fluid.layers.fill_constant( shape=[1], dtype='int64', value=0) step_idx.stop_gradient = True for i in xrange(self._max_sequence_len): step_out = fluid.layers.array_read(static_input_out_array, step_idx) step_out.stop_gradient = True static_input_step_outs.append(step_out) fluid.layers.increment(x=step_idx, value=1.0, in_place=True) if only_forward: return static_input_step_outs last = fluid.layers.sequence_pool(input=rnn(), pool_type='last') loss = fluid.layers.mean(last) append_backward(loss) static_input_grad = self._program.global_block().var( framework.grad_var_name('static_input_tensor')) return static_input_grad, loss def get_seq_len_from_lod(self, lod): return [lod[0][i + 1] - lod[0][i] for i in xrange(len(lod[0]) - 1)] def get_expected_static_step_outs(self): x_lod = self.x_tensor.lod() x_seq_len = self.get_seq_len_from_lod(x_lod) x_seq_len_sorted = sorted(x_seq_len) x_sorted_indices = np.argsort(x_seq_len)[::-1] static_lod = self.static_input_tensor.lod() static_sliced = [ self.static_input_data[static_lod[0][i]:static_lod[0][i + 1]] for i in xrange(len(static_lod[0]) - 1) ] static_seq_len = self.get_seq_len_from_lod(static_lod) static_reordered = [] for i in xrange(len(x_sorted_indices)): static_reordered.extend(static_sliced[x_sorted_indices[i]].tolist()) static_seq_len_reordered = [ static_seq_len[x_sorted_indices[i]] for i in xrange(len(x_sorted_indices)) ] static_step_outs = [] static_step_lods = [] for i in xrange(self._max_sequence_len): end = len(x_seq_len) - bisect.bisect_left(x_seq_len_sorted, i + 1) lod = [0] for i in xrange(end): lod.append(static_seq_len_reordered[i] + lod[-1]) static_step_lods.append([lod]) end = lod[-1] static_step_outs.append( np.array(static_reordered[:end]).astype('float32')) return static_step_outs, static_step_lods def test_step_out(self): static_step_outs = self.build_graph(only_forward=True) self.exe.run(framework.default_startup_program()) expected_outs, expected_lods = self.get_expected_static_step_outs() for i in xrange(self._max_sequence_len): step_out, lod = self.fetch_value(static_step_outs[i]) self.assertTrue(np.allclose(step_out, expected_outs[i])) self.assertTrue(np.allclose(lod, expected_lods[i])) def test_network_gradient(self): static_input_grad, loss = self.build_graph() self.exe.run(framework.default_startup_program()) actual_gradients, actual_lod = self.fetch_value(static_input_grad) static_input_shape = self.static_input_tensor.get_dims() numeric_gradients = np.zeros(shape=static_input_shape).astype('float32') # calculate numeric gradients tensor_size = np.product(static_input_shape) for i in xrange(tensor_size): origin = self.static_input_tensor.get_float_element(i) x_pos = origin + self._delta self.static_input_tensor.set_float_element(i, x_pos) y_pos = self.fetch_value(loss)[0][0] x_neg = origin - self._delta self.static_input_tensor.set_float_element(i, x_neg) y_neg = self.fetch_value(loss)[0][0] self.static_input_tensor.set_float_element(i, origin) numeric_gradients.ravel()[i] = (y_pos - y_neg) / self._delta / 2 self.assertTrue(np.allclose(actual_gradients, numeric_gradients, 0.001)) self.assertTrue(np.allclose(actual_lod, self.static_input_tensor.lod()))
class RecurrentOpTest1(unittest.TestCase): ''' Test RNNOp equation: h_t = ( x_t + h_{t-1} ) / scale vars: - x memories: - h outputs: - h ''' input_dim = 2 batch_size = 1 sent_len = 1 def setup_program(self): self.main_program = Program() self.startup_program = Program() self.p_info = { "main_program": self.main_program, "startup_program": self.startup_program } self.place = core.CPUPlace() def setUp(self): self.setup_program() self.data_field = {"x", "h_boot"} self.input_shape = (self.sent_len, self.batch_size, self.input_dim) self.output_shape = (self.sent_len, self.batch_size, self.input_dim) self.py_rnn = PySimpleRNN1(self.input_shape, self.output_shape) self.output = layers.mean(self.create_rnn_op(), **self.p_info) def create_rnn_op(self): x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], dtype='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False h_boot = layers.data( shape=[self.input_dim], dtype='float32', name='h_boot', **self.p_info) h_boot.stop_gradient = False rnn = layers.StaticRNN(main_program=self.main_program) with rnn.step(): h_pre = rnn.memory(init=h_boot) x_t = rnn.step_input(x) h = layers.scale( x=layers.elementwise_add( x=h_pre, y=x_t, **self.p_info), scale=self.py_rnn.scale, **self.p_info) rnn.update_memory(h_pre, h) rnn.output(h) return rnn() def forward(self): self.feed_map = { x: create_tensor(getattr(self.py_rnn, x), self.place) for x in self.data_field } exe = Executor(self.place) out = exe.run(self.main_program, feed=self.feed_map, fetch_list=[self.output]) return out[0] def backward(self): self.feed_map = { x: create_tensor(getattr(self.py_rnn, x), self.place) for x in self.data_field } fetch_list = [ self.main_program.global_block().var(grad_var_name(x)) for x in self.data_field ] exe = Executor(self.place) return exe.run(self.main_program, feed=self.feed_map, fetch_list=fetch_list, return_numpy=False) def test_backward(self): self.check_forward() append_backward(self.output) ana_grad = [np.array(x) for x in self.backward()] num_grad = self.get_numerical_gradient() for idx, name in enumerate(self.data_field): self.assertEqual(num_grad[idx].shape, ana_grad[idx].shape) self.assertTrue( np.isclose( num_grad[idx], ana_grad[idx], rtol=0.1).all()) def check_forward(self): print 'test recurrent op forward' pd_output = self.forward() py_output = self.py_rnn.forward() print 'pd_output', pd_output print print 'py_output', py_output self.assertEqual(pd_output.shape, py_output.shape) self.assertTrue(np.isclose(pd_output, py_output, rtol=0.1).all()) def get_numerical_gradient(self, delta=0.005): dloss_dout = 1.0 feed_list = [getattr(self.py_rnn, x) for x in self.data_field] grad_list = [np.zeros_like(x) for x in feed_list] for feed, grad in zip(feed_list, grad_list): for f, g in np.nditer([feed, grad], op_flags=['readwrite']): o = float(f) f[...] = o + delta y_pos = self.forward() f[...] = o - delta y_neg = self.forward() f[...] = o dout_dfeed = (y_pos - y_neg) / (delta * 2) g[...] = dout_dfeed[0] return grad_list