def get_places(self): place_list = [fluid.cpu_places(1), fluid.cpu_places(4)] if fluid.is_compiled_with_cuda(): place_list.extend( [fluid.cuda_places(0), fluid.cuda_places([0, 1])]) return place_list
def default_exe_params(is_distributed, use_cuda, thread_num): """ Set the default execute parameters. """ gpu_id = 0 trainer_num = 1 trainer_id = 0 dist_strategy = None places = None if is_distributed: if use_cuda: role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) gpu_id = int(os.getenv("FLAGS_selected_gpus")) trainer_num = fleet.worker_num() trainer_id = fleet.worker_index() exec_strategy = fluid.ExecutionStrategy() exec_strategy.use_experimental_executor = True exec_strategy.num_threads = 4 exec_strategy.num_iteration_per_drop_scope = 1 dist_strategy = DistributedStrategy() dist_strategy.exec_strategy = exec_strategy dist_strategy.nccl_comm_num = 2 dist_strategy.fuse_all_reduce_ops = True dist_strategy.forward_recompute = True dist_strategy.use_amp = True dist_strategy.amp_loss_scaling = 12800.0 places = fluid.cuda_places() else: print('Only gpu is supported for distributed mode at present.') exit(-1) else: if use_cuda: places = fluid.cuda_places() else: places = fluid.cpu_places(thread_num) os.environ['CPU_NUM'] = str(thread_num) if use_cuda: exe = fluid.Executor(fluid.CUDAPlace(gpu_id)) else: exe = fluid.Executor(fluid.CPUPlace()) return { 'exe': exe, 'trainer_num': trainer_num, 'trainer_id': trainer_id, 'gpu_id': gpu_id, 'dist_strategy': dist_strategy, 'places': places }
def __init__(self, model_type): self.model_type = model_type # 现有的CV模型都有这个属性,而这个属且也需要在eval时用到 self.num_classes = None self.labels = None self.version = paddlex.__version__ if paddlex.env_info['place'] == 'cpu': self.places = fluid.cpu_places() else: self.places = fluid.cuda_places() self.exe = fluid.Executor(self.places[0]) self.train_prog = None self.test_prog = None self.parallel_train_prog = None self.train_inputs = None self.test_inputs = None self.train_outputs = None self.test_outputs = None self.train_data_loader = None self.eval_metrics = None # 若模型是从inference model加载进来的,无法调用训练接口进行训练 self.trainable = True # 是否使用多卡间同步BatchNorm均值和方差 self.sync_bn = False # 当前模型状态 self.status = 'Normal' # 已完成迭代轮数,为恢复训练时的起始轮数 self.completed_epochs = 0 self.scope = fluid.global_scope() # 线程池,在模型在预测时用于对输入数据以图片为单位进行并行处理 # 主要用于batch_predict接口 thread_num = mp.cpu_count() if mp.cpu_count() < 8 else 8 self.thread_pool = mp.pool.ThreadPool(thread_num)
def main(args): config = get_config(args.config, overrides=args.override, show=True) use_gpu = config.get("use_gpu", True) places = fluid.cuda_places() if use_gpu else fluid.cpu_places() startup_prog = fluid.Program() valid_prog = fluid.Program() valid_dataloader, valid_fetchs = program.build(config, valid_prog, startup_prog, is_train=False, is_distributed=False) valid_prog = valid_prog.clone(for_test=True) exe = fluid.Executor(places[0]) exe.run(startup_prog) init_model(config, valid_prog, exe) valid_reader = Reader(config, 'valid')() valid_dataloader.set_sample_list_generator(valid_reader, places) compiled_valid_prog = program.compile(config, valid_prog) program.run(valid_dataloader, exe, compiled_valid_prog, valid_fetchs, -1, 'eval')
def main(args): import logging log.setLevel(logging.DEBUG) log.info("start") num_devices = len(F.cuda_places()) model = DeepwalkModel(args.num_nodes, args.hidden_size, args.neg_num, False, False, 1.) pyreader = model.pyreader loss = model.forward() train_steps = int(args.num_nodes * args.epoch / args.batch_size / num_devices) optimization(args.lr * num_devices, loss, train_steps, args.optimizer) place = F.CUDAPlace(0) exe = F.Executor(place) exe.run(F.default_startup_program()) graph = build_graph(args.num_nodes, args.edge_path) gen_func = build_gen_func(args, graph) pyreader.decorate_tensor_provider(gen_func) pyreader.start() train_prog = F.default_main_program() if args.warm_start_from_dir is not None: F.io.load_params(exe, args.warm_start_from_dir, train_prog) train_exe = get_parallel_exe(train_prog, loss) train(train_exe, exe, train_prog, loss, pyreader, args, train_steps)
def __init__(self, model_type): self.model_type = model_type # 现有的CV模型都有这个属性,而这个属且也需要在eval时用到 self.num_classes = None self.labels = None self.version = paddlex.__version__ if paddlex.env_info['place'] == 'cpu': self.places = fluid.cpu_places() else: self.places = fluid.cuda_places() self.exe = fluid.Executor(self.places[0]) self.train_prog = None self.test_prog = None self.parallel_train_prog = None self.train_inputs = None self.test_inputs = None self.train_outputs = None self.test_outputs = None self.train_data_loader = None self.eval_metrics = None # 若模型是从inference model加载进来的,无法调用训练接口进行训练 self.trainable = True # 是否使用多卡间同步BatchNorm均值和方差 self.sync_bn = False # 当前模型状态 self.status = 'Normal' # 已完成迭代轮数,为恢复训练时的起始轮数 self.completed_epochs = 0
def _get_activations_from_ims(img, model, batch_size, dims, use_gpu, premodel_path): n_batches = (len(img) + batch_size - 1) // batch_size n_used_img = len(img) pred_arr = np.empty((n_used_img, dims)) for i in tqdm(range(n_batches)): start = i * batch_size end = start + batch_size if end > len(img): end = len(img) images = img[start:end] if images.shape[1] != 3: images = images.transpose((0, 3, 1, 2)) images /= 255 output, main_program, startup_program = _build_program(model) place = fluid.cuda_places()[0] if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_program) fluid.load(main_program, os.path.join(premodel_path, 'paddle_inceptionv3'), exe) pred = exe.run(main_program, feed={'images': images}, fetch_list=[output])[0] pred_arr[start:end] = pred.reshape(end - start, -1) return pred_arr
def evaluate(self, eval_dataset, eval_hooks=[]): if not isinstance(eval_dataset, Dataset): raise ValueError( 'expect dataset to be instance of Dataset, got %s' % repr(eval_dataset)) program, model_spec = self.build_for_eval(eval_dataset) single_card_place = F.cuda_places()[0] eval_executor = F.Executor(single_card_place) eval_hooks = [ hooks.StopAtStepHook(self.run_config.eval_max_steps, self.run_config.eval_max_steps), hooks.EvalHook(model_spec.metrics, ) ] mon_exe = MonitoredExecutor(eval_executor, program, run_config=self.run_config, run_hooks=eval_hooks) mon_exe.init_or_restore_variables() try: with mon_exe: for data in eval_dataset.start(places=[single_card_place]): mon_exe.run(feed=data) except (StopException, F.core.EOFException) as e: pass _, eval_result = mon_exe.result summary_writer = get_summary_writer( os.path.join(self.run_config.model_dir, 'eval_history')) log_eval_result('eval', eval_result, summary_writer, mon_exe.state) return mon_exe.result
def train(use_cuda): # define program train_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): # For training: # inputs = [src, src_sequence_length, trg, trg_sequence_length, label] inputs, loader = data_func(is_train=True) logits = model_func(inputs, is_train=True) loss = loss_func(logits, inputs[-1], inputs[-2]) optimizer = optimizer_func() optimizer.minimize(loss) # define data source places = fluid.cuda_places() if use_cuda else fluid.cpu_places() loader.set_batch_generator(inputs_generator(batch_size, eos_id, is_train=True), places=places) exe = fluid.Executor(places[0]) exe.run(startup_prog) prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=loss.name) EPOCH_NUM = 20 for pass_id in six.moves.xrange(EPOCH_NUM): batch_id = 0 for data in loader(): loss_val = exe.run(prog, feed=data, fetch_list=[loss])[0] print('pass_id: %d, batch_id: %d, loss: %f' % (pass_id, batch_id, loss_val)) batch_id += 1 fluid.io.save_params(exe, model_save_dir, main_program=train_prog)
def __init__(self, sent_emb_dim, word_emb_dim, sent_len, lr=0.001, bidirectional=False, dropout_prob=None, num_layers=1, use_gpu=True, emb_size_ratio=1.5): super(SkipThoughts, self).__init__(sent_len=sent_len) self.sent_emb_dim = sent_emb_dim self.word_emb_dim = word_emb_dim self.lr = lr self.bidirectional = bidirectional self.dropout_prob = dropout_prob self.num_layers = num_layers self.place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() self.dataloader_places = fluid.cuda_places( ) if use_gpu else fluid.cpu_places(8) self.built = False self.test_emb_pin = 0 self.init = False self.test_fitted = False self.emb_size_ratio = emb_size_ratio
def predict(self, predict_dataset, ckpt=None, steps=-1, split_batch=True): ''' Perform predictoin will call `model_fn` and initiate user-specifed model in `propeller.RunMode.PREDICT` mode Args: infer_dataset (propeller.data.Dataset): should not `shuffle` or `repeat` steps (int): steps to predict, if -1 is specifed, will stop when `StopException` is raised in `infer_dataset` split_batch (bool): if True, prediction of each example in a batch is returned. Yields: Evaluated values of predictions tensors. ''' if not isinstance(predict_dataset, Dataset): raise ValueError( 'expect dataset to be instance of Dataset, got %s' % repr(predict_dataset)) program, model_spec = self.build_for_predict(predict_dataset) single_card_place = F.cuda_places()[0] executor = F.Executor(single_card_place) pred_run_config = RunConfig(run_steps=steps if steps == -1 else None, model_dir=self.run_config.model_dir) mon_exe = MonitoredExecutor( executor, program, run_config=pred_run_config, ) mon_exe.init_or_restore_variables() try: with mon_exe: log.info('Runining predict from dir: %s' % repr(mon_exe.state)) single_card_place = F.cuda_places()[0] for data in predict_dataset.start(places=[single_card_place]): res = mon_exe.run(fetch_list=model_spec.predictions, feed=data) if split_batch: res = map(lambda i: i.tolist(), res) res = zip(*res) # transpose for r in res: yield r else: yield list(map(lambda i: i.tolist(), res)) except (StopException, F.core.EOFException) as e: pass
def serve(model_dir, host, num_concurrent=None): if six.PY2: raise RuntimeError('propeller service work in python3 only') num_worker = len( F.cuda_places()) if num_concurrent is None else num_concurrent pool = ThreadPoolExecutor(num_worker) class Predictor(object): def __init__(self, did): log.debug('create predictor on card %d' % did) config = F.core.AnalysisConfig(model_dir) config.enable_use_gpu(5000, did) self._predictor = F.core.create_paddle_predictor(config) @profile('paddle') def __call__(self, args): for i, a in enumerate(args): a.name = 'placeholder_%d' % i res = self._predictor.run(args) return res predictor_context = {} class InferenceService(interface_pb2_grpc.InferenceServicer): @profile('service') def Infer(self, request, context): try: slots = request.slots current_thread = threading.current_thread() log.debug('%d slots received dispatch to thread %s' % (len(slots), current_thread)) if current_thread not in predictor_context: did = list(pool._threads).index(current_thread) log.debug('spawning worker thread %d' % did) predictor = Predictor(did) predictor_context[current_thread] = predictor else: predictor = predictor_context[current_thread] slots = [serv_utils.slot_to_paddlearray(s) for s in slots] ret = predictor(slots) response = [serv_utils.paddlearray_to_slot(r) for r in ret] except Exception as e: log.exception(e) raise e return interface_pb2.Slots(slots=response) server = grpc.server(pool) interface_pb2_grpc.add_InferenceServicer_to_server(InferenceService(), server) server.add_insecure_port(host) server.start() log.info('server started on %s...' % host) try: while True: sleep(100000) except KeyboardInterrupt as e: pass log.info('server stoped...')
def train_static_graph(epoch_num, use_multi_gpu): resnet = ResNet() image = fluid.data(name='image', shape=[None] + train_parameters['input_size'], dtype='float32') label = fluid.data(name='label', shape=[None], dtype='int64') out = resnet(image) loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss) optimizer = optimizer_setting(train_parameters, parameter_list=resnet.parameters()) optimizer.minimize(avg_loss) program = fluid.default_main_program() if use_multi_gpu: program = fluid.CompiledProgram(program).with_data_parallel( loss_name=avg_loss.name) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) image_shape = train_parameters['input_size'] reader = create_reader() def single_device_feed_reader(reader): def __impl__(): for data in reader(): image_np = np.array( [np.reshape(x[0], image_shape) for x in data]) label_np = np.array([x[1] for x in data]) yield {image.name: image_np, label.name: label_np} return __impl__ reader = single_device_feed_reader(reader) if use_multi_gpu: reader = paddle.batch(reader, batch_size=len(fluid.cuda_places()), drop_last=True) for epoch_id in six.moves.range(epoch_num): for i, data in enumerate(reader()): avg_loss_val, = exe.run(program, feed=data, fetch_list=[avg_loss]) if i % 10 == 0: print('Epoch {}, batch {}, avg_loss {}'.format( epoch_id, i, avg_loss_val)) fluid.io.save_inference_model('./infer_static_graph', feeded_var_names=[image.name], target_vars=[out], executor=exe)
def main(): global args, best_mIoU args = parser.parse_args() if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) if args.dataset == 'LaneDet': num_class = 20 else: raise ValueError('Unknown dataset ' + args.dataset) # get places places = fluid.cuda_places() with fluid.dygraph.guard(): model = models.ERFNet(num_class, [576, 1024]) input_mean = model.input_mean input_std = model.input_std if args.resume: print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint, _ = fluid.load_dygraph(args.resume) model.load_dict(checkpoint) print("=> checkpoint loaded successfully") else: print(("=> loading checkpoint '{}'".format('trained/ERFNet_trained'))) checkpoint, _ = fluid.load_dygraph('trained/ERFNet_trained') model.load_dict(checkpoint) print("=> default checkpoint loaded successfully") # Data loading code test_dataset = ds.LaneDataSet( dataset_path='datasets/PreliminaryData', data_list=args.val_list, transform=[ lambda x: cv2.resize(x, (1024, 576)), lambda x: x - np.asarray(input_mean)[None, None, :] / np.array(input_std)[None, None, :], ] ) test_loader = DataLoader( test_dataset, places=places[0], batch_size=1, shuffle=False, num_workers=args.workers, collate_fn=collate_fn ) ### evaluate ### mIoU = validate(test_loader, model) # print('mIoU: {}'.format(mIoU)) return
def test_main(self): places = [fluid.cpu_places(4)] if fluid.is_compiled_with_cuda(): places.append(fluid.cuda_places()) for p in places: for has_persistable in [False, True]: for use_split in [False, True]: self.run_network(p, use_split=use_split, has_persistable=has_persistable)
def __init__(self, num_classes=2, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255, sync_bn=True): self.init_params = locals() if num_classes > 2 and (use_bce_loss or use_dice_loss): raise ValueError( "dice loss and bce loss is only applicable to binary classfication" ) if class_weight is not None: if isinstance(class_weight, list): if len(class_weight) != num_classes: raise ValueError( "Length of class_weight should be equal to number of classes" ) elif isinstance(class_weight, str): if class_weight.lower() != 'dynamic': raise ValueError( "if class_weight is string, must be dynamic!") else: raise TypeError( 'Expect class_weight is a list or string but receive {}'. format(type(class_weight))) self.num_classes = num_classes self.use_bce_loss = use_bce_loss self.use_dice_loss = use_dice_loss self.class_weight = class_weight self.ignore_index = ignore_index self.sync_bn = sync_bn self.labels = None self.env_info = get_environ_info() if self.env_info['place'] == 'cpu': self.places = fluid.cpu_places() else: self.places = fluid.cuda_places() self.exe = fluid.Executor(self.places[0]) self.train_prog = None self.test_prog = None self.parallel_train_prog = None self.train_inputs = None self.test_inputs = None self.train_outputs = None self.test_outputs = None self.train_data_loader = None self.eval_metrics = None # 当前模型状态 self.status = 'Normal'
def get_data_run_places(args): """ 根据获取数据层(dataloader)的运行位置 :return: 运行位置 """ USE_PARALLEL = args["use_parallel"] USE_GPU = args["use_gpu"] NUM_OF_DEVICE = args["num_of_device"] if USE_PARALLEL and NUM_OF_DEVICE > 1: if USE_GPU: os.environ['CUDA_VISIBLE_DEVICES'] = str(NUM_OF_DEVICE) places = fluid.cuda_places() else: places = fluid.cpu_places(NUM_OF_DEVICE) else: if USE_GPU: places = fluid.cuda_places(0) else: places = fluid.cpu_places(1) return places
def cosine_decay(args): places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() step = int( math.ceil(float(args.total_images) / (args.batch_size * len(places)))) learning_rate = fluid.layers.cosine_decay(learning_rate=args.lr, step_each_epoch=step, epochs=args.num_epochs) optimizer = fluid.optimizer.Momentum( learning_rate=learning_rate, momentum=args.momentum_rate, regularization=fluid.regularizer.L2Decay(args.l2_decay)) return learning_rate, optimizer
def piecewise_decay(args): places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() step = int( math.ceil(float(args.total_images) / (args.batch_size * len(places)))) bd = [step * e for e in args.step_epochs] lr = [args.lr * (0.1**i) for i in range(len(bd) + 1)] learning_rate = fluid.layers.piecewise_decay(boundaries=bd, values=lr) optimizer = fluid.optimizer.Momentum( learning_rate=learning_rate, momentum=args.momentum_rate, regularization=fluid.regularizer.L2Decay(args.l2_decay)) return learning_rate, optimizer
def main(args): # construct the sample input, output, data_size = construct_sample(args) # construct the train program train_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(train_program, startup_program): seq2seq_model = SeqModel(args.seq_num, args.batch_size, args.hidden_size) ret_dict = seq2seq_model.build_graph() val_program = train_program.clone() with fluid.program_guard(train_program, startup_program): optimizer = fluid.optimizer.Adam(args.lr) optimizer.minimize(ret_dict.loss) places = fluid.cuda_places() if args.use_cuda else fluid.cpu_places() train_loader = fluid.io.DataLoader.from_generator( feed_list=ret_dict.feed_list, capacity=3, iterable=True) train_loader.set_batch_generator(train_reader(input, output, data_size, args.batch_size), places=places) exe = Executor(places[0]) exe.run(startup_program) # train stage:use data_loader as reader for _ in range(args.epoch): for data in train_loader(): results = exe.run(train_program, feed=data, fetch_list=ret_dict.fetch_list) print("train process loss:{}".format(results[0])) # save the model for inferenceing with fluid.program_guard(train_program, startup_program): fluid.io.save_inference_model(dirname="./model", feeded_var_names=['feat', 'lod'], \ target_vars=[ret_dict.last_predict], executor=exe, export_for_deployment=True) # val stage: use data_loader as reader val_loader = fluid.io.DataLoader.from_generator( feed_list=ret_dict.feed_list, capacity=3, iterable=True) val_loader.set_batch_generator(val_reader(input, output, data_size, output.shape[0]), places=places) for _ in range(1): for data in train_loader: results = exe.run(train_program, feed=data, fetch_list=ret_dict.fetch_list) print("val process loss:{}".format(results[0]))
def prepare_places(self, with_data_parallel, with_cpu=True, with_gpu=True): places = [] if with_cpu: places.append([fluid.CPUPlace()]) if with_data_parallel: places.append([fluid.CPUPlace()] * 2) if with_gpu and fluid.core.is_compiled_with_cuda(): tmp = fluid.cuda_places() assert len(tmp) > 0, "no gpu detected" if with_data_parallel: places.append(tmp) places.append([tmp[0]]) return places
def start(self, places=F.cuda_places()): #assert self.pyreader is not None, 'use Dataset.features to build net first, then start dataset' def gen(): try: for idx, i in enumerate(self.generator()): yield i except Exception as e: log.exception(e) raise e r = F.io.PyReader( feed_list=self.placeholders(), capacity=50, iterable=True) r.decorate_batch_generator(gen, places=places) return r()
def check_multi_card_fetch_var(self): if self.is_invalid_test(): return prog1, scope1, exe, loss1 = self.build_program_and_scope() scopes = [] compiled_programs = [] if self.use_cuda: places = fluid.cuda_places() else: places = fluid.cpu_places(self.device_count) for memory_optimize in [False, True]: for enable_inplace in [False, True]: prog, scope, _, loss = self.build_program_and_scope() scopes.append(scope) build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = memory_optimize build_strategy.enable_inplace = enable_inplace build_strategy.fuse_all_optimizer_ops = self.fuse_all_optimizer_ops compiled_program = fluid.CompiledProgram( prog).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, places=places) compiled_programs.append(compiled_program) repeated_var_names = self.get_all_vars(prog1) * 2 random.shuffle(repeated_var_names) # add some random for fetch_var in repeated_var_names: for _ in range(4): fetch_vals = [] for scope, compiled_prog in zip(scopes, compiled_programs): with fluid.scope_guard(scope): fetch_val, = exe.run(compiled_prog, feed=feed_dict, fetch_list=[fetch_var]) fetch_vals.append(fetch_val) for item in fetch_vals: self.assertTrue(np.array_equal(fetch_vals[0], item)) self.assertTrue( np.array_equal(fetch_vals[0], item), "error var name: {}, fetch_vals[0]: {}, item: {}". format(fetch_var, fetch_vals[0][~np.equal(fetch_vals[0], item)], item[~np.equal(fetch_vals[0], item)]))
def main(args): input, output, data_size = construct_sample(args) places = fluid.cuda_places() if args.use_cuda else fluid.cpu_places() exe = Executor(places[0]) [inference_program, feed_target_names, fetch_targets] = (fluid.io.load_inference_model(dirname="./model", executor=exe)) feat, lod = test_reader() result = exe.run(inference_program, feed={ feed_target_names[0]: feat, feed_target_names[1]: lod }, fetch_list=fetch_targets) print(result[0].shape) output_final_result(result[0])
def __init__(self, learning_rate, momentum, parameter_list=None, regularization=None, config=None, **args): super(Momentum, self).__init__() self.learning_rate = learning_rate self.momentum = momentum self.parameter_list = parameter_list self.regularization = regularization self.multi_precision = config.get('multi_precision', False) self.rescale_grad = ( 1.0 / (config['TRAIN']['batch_size'] / len(fluid.cuda_places())) if config.get('use_pure_fp16', False) else 1.0)
def train(self, train_ds, train_hooks=[]): if not isinstance(train_ds, Dataset): raise ValueError( 'expect dataset to be instance of Dataset, got %s' % repr(train_ds)) train_program, model_spec, summary_record = self.build_for_train( train_ds) train_run_hooks = [ hooks.StopAtStepHook(self.run_config.max_steps, self.run_config.run_steps), hooks.LoggingHook(model_spec.loss, summary_record=summary_record, summary_writer=get_summary_writer( os.path.join(self.run_config.model_dir, 'train_history')), per_step=self.run_config.log_steps, skip_step=self.run_config.skip_steps), ] train_run_hooks.extend(train_hooks) train_executor = F.Executor(F.cuda_places()[0]) mon_exe = MonitoredExecutor(train_executor, train_program, loss=model_spec.loss, run_config=self.run_config, run_hooks=train_run_hooks, warm_start_setting=self.warm_start_setting) distribution.init_distribuition_env( train_program) #only initialize distribute training with mon_exe.init_or_restore_variables() if distribution.status.is_master: mon_exe._hooks.append( hooks.CheckpointSaverHook(mon_exe._saver, per_step=mon_exe._save_steps, skip_step=mon_exe._skip_steps)) try: with mon_exe: for data in train_ds.start(): mon_exe.run(feed=data) except (StopException, F.core.EOFException) as e: pass return mon_exe.result
def __init__(self, args): self.batch_size = args.batch_size self.lr = args.lr self.lr_strategy = args.lr_strategy self.l2_decay = args.l2_decay self.momentum_rate = args.momentum_rate self.step_epochs = args.step_epochs self.num_epochs = args.num_epochs self.warm_up_epochs = args.warm_up_epochs self.decay_epochs = args.decay_epochs self.decay_rate = args.decay_rate self.total_images = args.total_images self.multi_precision = args.multi_precision self.rescale_grad = (1.0 / (args.batch_size / len(fluid.cuda_places())) if args.use_pure_fp16 else 1.0) self.step = int(math.ceil(float(self.total_images) / self.batch_size))
def test_analysis_helper(self): image = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') model = MobileNet() out = model.net(input=image, class_dim=10) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) optimizer = fluid.optimizer.Momentum( momentum=0.9, learning_rate=0.01, regularization=fluid.regularizer.L2Decay(4e-5)) optimizer.minimize(avg_cost) main_prog = fluid.default_main_program() places = fluid.cuda_places() if fluid.is_compiled_with_cuda( ) else fluid.cpu_places() exe = fluid.Executor(places[0]) train_reader = paddle.fluid.io.batch(paddle.dataset.mnist.train(), batch_size=64) train_loader = fluid.io.DataLoader.from_generator( feed_list=[image, label], capacity=512, use_double_buffer=True, iterable=True) train_loader.set_sample_list_generator(train_reader, places) exe.run(fluid.default_startup_program()) vars = ['conv2d_0.tmp_0', 'fc_0.tmp_0', 'fc_0.tmp_1', 'fc_0.tmp_2'] var_collector1 = VarCollector(main_prog, vars, use_ema=True) values = var_collector1.abs_max_run(train_loader, exe, step=None, loss_name=avg_cost.name) vars = [v.name for v in main_prog.list_vars() if v.persistable] var_collector2 = VarCollector(main_prog, vars, use_ema=False) values = var_collector2.run(train_loader, exe, step=None, loss_name=avg_cost.name) var_collector2.pdf(values)
def main(args): """ main """ import logging log.setLevel(logging.DEBUG) log.info("start") if args.dataset is not None: if args.dataset == "BlogCatalog": graph = data_loader.BlogCatalogDataset().graph else: raise ValueError(args.dataset + " dataset doesn't exists") log.info("Load buildin BlogCatalog dataset done.") node_feat = np.expand_dims(graph.node_feat["group_id"].argmax(-1), -1) + graph.num_nodes args.num_nodes = graph.num_nodes args.num_embedding = graph.num_nodes + graph.node_feat[ "group_id"].shape[-1] else: graph = build_graph(args.num_nodes, args.edge_path, args.output_path) node_feat = np.load(args.node_feat_npy) model = GESModel(args.num_embedding, node_feat.shape[1] + 1, args.hidden_size, args.neg_num, False, 2) pyreader = model.pyreader loss = model.forward() num_devices = len(F.cuda_places()) train_steps = int(args.num_nodes * args.epoch / args.batch_size / num_devices) log.info("Train steps: %s" % train_steps) optimization(args.lr * num_devices, loss, train_steps, args.optimizer) place = F.CUDAPlace(0) exe = F.Executor(place) exe.run(F.default_startup_program()) gen_func = build_gen_func(args, graph, node_feat) pyreader.decorate_tensor_provider(gen_func) pyreader.start() train_prog = F.default_main_program() train_exe = get_parallel_exe(train_prog, loss) train(train_exe, exe, train_prog, loss, pyreader, args, train_steps)
def run_network(self, iterable, use_cuda, drop_last): x = fluid.data(shape=[None, 1], name='x', dtype='float32') places = fluid.cuda_places() if use_cuda else fluid.cpu_places(4) loader = fluid.io.DataLoader.from_generator(feed_list=[x], capacity=16, iterable=iterable, drop_last=drop_last) y = fluid.layers.fc(x, size=10) loss = fluid.layers.reduce_mean(y) exe = fluid.Executor(places[0]) exe.run(fluid.default_startup_program()) prog = fluid.CompiledProgram( fluid.default_main_program()).with_data_parallel( places=places, loss_name=loss.name) loader.set_batch_generator(self.create_reader(), places=places if iterable else None) for _ in six.moves.range(self.epoch_num): actual_batch_num = 0 if loader.iterable: for feed_data in loader(): x_data, = exe.run(prog, feed=feed_data, fetch_list=[x]) self.assertEqual(x_data.shape[0] % self.batch_size, 0) self.assertTrue(x_data.shape[0] != 0) actual_batch_num += int(x_data.shape[0] / self.batch_size) else: loader.start() try: while True: x_data, = exe.run(prog, fetch_list=[x]) self.assertEqual(x_data.shape[0] % self.batch_size, 0) self.assertTrue(x_data.shape[0] != 0) actual_batch_num += int(x_data.shape[0] / self.batch_size) except fluid.core.EOFException: loader.reset() if not drop_last or len(places) == 1: self.assertEqual(self.batch_num, actual_batch_num) else: self.assertGreater(self.batch_num, actual_batch_num)