def train_parallel_exe(args, learning_rate, batch_size, num_passes, init_model=None, pretrained_model=None, model_save_dir='model', parallel=True, use_nccl=True, lr_strategy=None, layers=50): class_dim = 1000 image_shape = [3, 224, 224] image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') if args.model is 'se_resnext': out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers) else: out = mobile_net(img=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) avg_cost = fluid.layers.mean(x=cost) test_program = fluid.default_main_program().clone(for_test=True) if "piecewise_decay" in lr_strategy: bd = lr_strategy["piecewise_decay"]["bd"] lr = lr_strategy["piecewise_decay"]["lr"] optimizer = fluid.optimizer.Momentum( learning_rate=fluid.layers.piecewise_decay(boundaries=bd, values=lr), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) elif "cosine_decay" in lr_strategy: step_each_epoch = lr_strategy["cosine_decay"]["step_each_epoch"] epochs = lr_strategy["cosine_decay"]["epochs"] optimizer = fluid.optimizer.Momentum( learning_rate=cosine_decay(learning_rate=learning_rate, step_each_epoch=step_each_epoch, epochs=epochs), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) else: optimizer = fluid.optimizer.Momentum( learning_rate=learning_rate, momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) opts = optimizer.minimize(avg_cost) if args.with_mem_opt: fluid.memory_optimize(fluid.default_main_program()) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) fluid.default_startup_program.random_seed = 1000 exe.run(fluid.default_startup_program()) if init_model is not None: fluid.io.load_persistables(exe, init_model) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) train_reader = paddle.batch(flowers.train(), batch_size=batch_size) test_reader = paddle.batch(flowers.test(), batch_size=batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name) test_exe = fluid.ParallelExecutor(use_cuda=True, main_program=test_program, share_vars_from=train_exe) fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name] train_speed = [] for pass_id in range(num_passes): train_info = [[], [], []] test_info = [[], [], []] pass_time = 0 pass_num = 0 pass_speed = 0.0 for batch_id, data in enumerate(train_reader()): t1 = time.time() loss, acc1, acc5 = train_exe.run(fetch_list, feed=feeder.feed(data)) t2 = time.time() period = t2 - t1 pass_time += period pass_num += len(data) loss = np.mean(np.array(loss)) acc1 = np.mean(np.array(acc1)) acc5 = np.mean(np.array(acc5)) train_info[0].append(loss) train_info[1].append(acc1) train_info[2].append(acc5) if batch_id % 10 == 0: print("Pass {0}, trainbatch {1}, loss {2}, \ acc1 {3}, acc5 {4} time {5}" .format(pass_id, \ batch_id, loss, acc1, acc5, \ "%2.2f sec" % period)) sys.stdout.flush() train_loss = np.array(train_info[0]).mean() train_acc1 = np.array(train_info[1]).mean() train_acc5 = np.array(train_info[2]).mean() pass_speed = pass_num / pass_time train_speed.append(pass_speed) if pass_id == num_passes - 1: train_acc_top1_kpi.add_record(train_acc1) train_acc_top5_kpi.add_record(train_acc5) train_cost_kpi.add_record(train_loss) mean_pass_speed = np.array(pass_speed).mean() train_speed_kpi.add_record(mean_pass_speed) for data in test_reader(): t1 = time.time() loss, acc1, acc5 = test_exe.run(fetch_list, feed=feeder.feed(data)) t2 = time.time() period = t2 - t1 loss = np.mean(np.array(loss)) acc1 = np.mean(np.array(acc1)) acc5 = np.mean(np.array(acc5)) test_info[0].append(loss) test_info[1].append(acc1) test_info[2].append(acc5) if batch_id % 10 == 0: print("Pass {0},testbatch {1},loss {2}, \ acc1 {3},acc5 {4},time {5}" .format(pass_id, \ batch_id, loss, acc1, acc5, \ "%2.2f sec" % period)) sys.stdout.flush() test_loss = np.array(test_info[0]).mean() test_acc1 = np.array(test_info[1]).mean() test_acc5 = np.array(test_info[2]).mean() print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, \ test_loss {4}, test_acc1 {5}, test_acc5 {6}, pass_time {7}, train_speed {8}" .format(pass_id, \ train_loss, train_acc1, train_acc5, test_loss, test_acc1, \ test_acc5, pass_time, pass_num / pass_time)) sys.stdout.flush() train_acc_top1_kpi.persist() train_acc_top5_kpi.persist() train_cost_kpi.persist() train_speed_kpi.persist()
def parallel_do(self, train_inputs, test_inputs, seed): main = fluid.Program() startup = fluid.Program() startup.random_seed = seed with fluid.program_guard(main, startup): data = fluid.layers.data(name='image', shape=[3, 224, 224], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') devices_num = fluid.core.get_cuda_device_count() places = fluid.layers.get_places(devices_num) pd = fluid.layers.ParallelDo(places, use_nccl=True) with pd.do(): im = pd.read_input(data) lb = pd.read_input(label) out = SE_ResNeXt(input=im, class_dim=102) loss = fluid.layers.cross_entropy(input=out, label=lb) loss = fluid.layers.mean(loss) #loss = fluid.layers.reduce_sum(loss) pd.write_output(loss) loss = pd() avg_loss = fluid.layers.mean(loss) #avg_loss = fluid.layers.reduce_sum(loss) test_program = main.clone(for_test=True) # learning_rate=cosine_decay(0.01, 1, len(train_inputs)), #opt = fluid.optimizer.Momentum( opt = fluid.optimizer.SGD(learning_rate=0.1) #regularization=fluid.regularizer.L2Decay(1e-4)) #momentum=0.9, opt.minimize(avg_loss, startup) #fluid.memory_optimize(main) #print('do main ', main) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup) var = fluid.global_scope().find_var('conv2d_0.w_0').get_tensor() #print('do w ', np.array(var)) grad_var = fluid.framework.get_var('conv2d_0.w_0@GRAD') fetch_list = [avg_loss, grad_var] feeder = fluid.DataFeeder(place=place, feed_list=[data, label]) losses = [] grads = [] test_losses = [] for data in train_inputs: all_vars = main.global_block().vars import collections all_parameters = collections.OrderedDict() for k, v in all_vars.iteritems(): if v.persistable and 'velocity' not in k: all_parameters[k] = v print('Total vars: %d\n' % (len(all_parameters))) for k, v in all_parameters.iteritems(): var = fluid.global_scope().find_var(k).get_tensor() print('!!%s: %f\n' % (k, np.sum(np.abs(np.array(var))))) ret = exe.run(main, feed=feeder.feed(data), fetch_list=fetch_list) losses.append(ret[0][0]) grads.append(ret[1]) for test_data in test_inputs: test_loss = exe.run(test_program, feed=feeder.feed(test_data), fetch_list=[avg_loss]) test_losses.append(test_loss[0][0]) return losses, grads, test_losses
def exe(self, train_inputs, test_inputs, seed): main = fluid.Program() startup = fluid.Program() startup.random_seed = seed with fluid.program_guard(main, startup): data = fluid.layers.data(name='image', shape=[3, 224, 224], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') out = SE_ResNeXt(input=data, class_dim=102) loss = fluid.layers.cross_entropy(input=out, label=label) loss = fluid.layers.mean(loss) test_program = main.clone(for_test=True) # learning_rate=cosine_decay(0.01, 1, len(train_inputs)), opt = fluid.optimizer.Momentum( learning_rate=0.01, momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) opt.minimize(loss) #fluid.memory_optimize(main) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup) var = fluid.global_scope().find_var('conv2d_0.w_0').get_tensor() #print('do w ', np.array(var)) #print('exe main ', main) grad_var = fluid.framework.get_var('conv2d_0.w_0@GRAD') fetch_list = [loss, grad_var] feeder = fluid.DataFeeder(place=place, feed_list=[data, label]) #fetch_vars = [] #for k, _ in main.blocks[0].vars.iteritems(): # fetch_vars.append(k) losses = [] grads = [] test_losses = [] for data in train_inputs: all_vars = main.global_block().vars #all_parameters = {k : v for k, v in all_vars.iteritems() if v.persistable} import collections all_parameters = collections.OrderedDict() for k, v in all_vars.iteritems(): if v.persistable and 'velocity' not in k: all_parameters[k] = v print('Total vars: %d\n' % (len(all_parameters))) for k, v in all_parameters.iteritems(): var = fluid.global_scope().find_var(k).get_tensor() print('!!%s: %f\n' % (k, np.sum(np.abs(np.array(var))))) ret = exe.run(main, feed=feeder.feed(data), fetch_list=fetch_list) loss_v = np.array(ret[0]) losses.append(loss_v[0]) grads.append(np.array(ret[1])) #sys.stderr.write('total vars: %d, returned: %d\n' % # (len(fetch_vars), len(ret))) #for i in xrange(2, len(ret)): # sys.stderr.write('!!%s: %s\n' % (fetch_vars[i - 1], # np.sum(np.abs(ret[i])))) for test_data in test_inputs: test_loss = exe.run(test_program, feed=feeder.feed(test_data), fetch_list=[loss]) test_losses.append(test_loss[0][0]) return losses, grads, test_losses
def train_parallel_do(args, learning_rate, batch_size, num_passes, init_model=None, pretrained_model=None, model_save_dir='model', parallel=True, use_nccl=True, lr_strategy=None, layers=50): class_dim = 1000 image_shape = [3, 224, 224] image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') if parallel: places = fluid.layers.device.get_places() pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl) with pd.do(): image_ = pd.read_input(image) label_ = pd.read_input(label) if args.model is 'se_resnext': out = SE_ResNeXt(input=image_, class_dim=class_dim, layers=layers) else: out = mobile_net(img=image_, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label_) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label_, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label_, k=5) pd.write_output(avg_cost) pd.write_output(acc_top1) pd.write_output(acc_top5) avg_cost, acc_top1, acc_top5 = pd() avg_cost = fluid.layers.mean(x=avg_cost) acc_top1 = fluid.layers.mean(x=acc_top1) acc_top5 = fluid.layers.mean(x=acc_top5) else: if args.model is 'se_resnext': out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers) else: out = mobile_net(img=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) inference_program = fluid.default_main_program().clone(for_test=True) if "piecewise_decay" in lr_strategy: bd = lr_strategy["piecewise_decay"]["bd"] lr = lr_strategy["piecewise_decay"]["lr"] optimizer = fluid.optimizer.Momentum( learning_rate=fluid.layers.piecewise_decay(boundaries=bd, values=lr), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) elif "cosine_decay" in lr_strategy: step_each_epoch = lr_strategy["cosine_decay"]["step_each_epoch"] epochs = lr_strategy["cosine_decay"]["epochs"] optimizer = fluid.optimizer.Momentum( learning_rate=cosine_decay(learning_rate=learning_rate, step_each_epoch=step_each_epoch, epochs=epochs), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) else: optimizer = fluid.optimizer.Momentum( learning_rate=learning_rate, momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) opts = optimizer.minimize(avg_cost) if args.with_mem_opt: fluid.memory_optimize(fluid.default_main_program()) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if init_model is not None: fluid.io.load_persistables(exe, init_model) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) train_reader = paddle.batch(reader.train(), batch_size=batch_size) test_reader = paddle.batch(reader.test(), batch_size=batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) for pass_id in range(num_passes): train_info = [[], [], []] test_info = [[], [], []] for batch_id, data in enumerate(train_reader()): t1 = time.time() loss, acc1, acc5 = exe.run( fluid.default_main_program(), feed=feeder.feed(data), fetch_list=[avg_cost, acc_top1, acc_top5]) t2 = time.time() period = t2 - t1 train_info[0].append(loss[0]) train_info[1].append(acc1[0]) train_info[2].append(acc5[0]) if batch_id % 10 == 0: print("Pass {0}, trainbatch {1}, loss {2}, \ acc1 {3}, acc5 {4} time {5}" .format(pass_id, \ batch_id, loss[0], acc1[0], acc5[0], \ "%2.2f sec" % period)) sys.stdout.flush() train_loss = np.array(train_info[0]).mean() train_acc1 = np.array(train_info[1]).mean() train_acc5 = np.array(train_info[2]).mean() for data in test_reader(): t1 = time.time() loss, acc1, acc5 = exe.run( inference_program, feed=feeder.feed(data), fetch_list=[avg_cost, acc_top1, acc_top5]) t2 = time.time() period = t2 - t1 test_info[0].append(loss[0]) test_info[1].append(acc1[0]) test_info[2].append(acc5[0]) if batch_id % 10 == 0: print("Pass {0},testbatch {1},loss {2}, \ acc1 {3},acc5 {4},time {5}" .format(pass_id, \ batch_id, loss[0], acc1[0], acc5[0], \ "%2.2f sec" % period)) sys.stdout.flush() test_loss = np.array(test_info[0]).mean() test_acc1 = np.array(test_info[1]).mean() test_acc5 = np.array(test_info[2]).mean() print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, \ test_loss {4}, test_acc1 {5}, test_acc5 {6}" .format(pass_id, \ train_loss, train_acc1, train_acc5, test_loss, test_acc1, \ test_acc5)) sys.stdout.flush() model_path = os.path.join(model_save_dir + '/' + args.model, str(pass_id)) if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_persistables(exe, model_path)
def train_parallel_exe(args, learning_rate, batch_size, num_passes, init_model=None, model_save_dir='model', parallel=True, use_nccl=True, lr_strategy=None, layers=50): class_dim = 1000 image_shape = [3, 224, 224] image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) test_program = fluid.default_main_program().clone(for_test=True) if "piecewise_decay" in lr_strategy: bd = lr_strategy["piecewise_decay"]["bd"] lr = lr_strategy["piecewise_decay"]["lr"] optimizer = fluid.optimizer.Momentum( learning_rate=fluid.layers.piecewise_decay( boundaries=bd, values=lr), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) elif "cosine_decay" in lr_strategy: print('cosine_decay') step_each_epoch = lr_strategy["cosine_decay"]["step_each_epoch"] epochs = lr_strategy["cosine_decay"]["epochs"] optimizer = fluid.optimizer.Momentum( learning_rate=cosine_decay(learning_rate=learning_rate, step_each_epoch=step_each_epoch, epochs=epochs), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) else: optimizer = fluid.optimizer.Momentum( learning_rate=learning_rate, momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) opts = optimizer.minimize(avg_cost) fluid.memory_optimize(fluid.default_main_program()) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if init_model is not None: fluid.io.load_persistables(exe, init_model) train_reader = paddle.batch(reader.train(), batch_size=batch_size) test_reader = paddle.batch(reader.test(), batch_size=32) feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name) fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name] for pass_id in range(num_passes): train_info = [[], [], []] test_info = [[], [], []] for batch_id, data in enumerate(train_reader()): t1 = time.time() loss, acc1, acc5 = train_exe.run( fetch_list, feed=feeder.feed(data)) t2 = time.time() period = t2 - t1 loss = np.mean(np.array(loss)) acc1 = np.mean(np.array(acc1)) acc5 = np.mean(np.array(acc5)) train_info[0].append(loss) train_info[1].append(acc1) train_info[2].append(acc5) if batch_id % 10 == 0: print("Pass {0}, trainbatch {1}, loss {2}, \ acc1 {3}, acc5 {4} time {5}" .format(pass_id, \ batch_id, loss, acc1, acc5, \ "%2.2f sec" % period)) sys.stdout.flush() train_loss = np.array(train_info[0]).mean() train_acc1 = np.array(train_info[1]).mean() train_acc5 = np.array(train_info[2]).mean() for batch_id, data in enumerate(test_reader()): t1 = time.time() loss, acc1, acc5 = exe.run( test_program, feed=feeder.feed(data), fetch_list=[avg_cost, acc_top1, acc_top5]) t2 = time.time() period = t2 - t1 test_info[0].append(loss[0]) test_info[1].append(acc1[0]) test_info[2].append(acc5[0]) if batch_id % 10 == 0: print("Pass {0},testbatch {1},loss {2}, \ acc1 {3},acc5 {4},time {5}".format(pass_id, \ batch_id, loss, acc1, acc5, \ "%2.2f sec" % period)) sys.stdout.flush() test_loss = np.array(test_info[0]).mean() test_acc1 = np.array(test_info[1]).mean() test_acc5 = np.array(test_info[2]).mean() print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, \ test_loss {4}, test_acc1 {5}, test_acc5 {6}" .format(pass_id, \ train_loss, train_acc1, train_acc5, test_loss, test_acc1, \ test_acc5)) sys.stdout.flush() model_path = os.path.join(model_save_dir, str(pass_id)) if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_persistables(exe, model_path)