def __init__(self, train_batch_size=40, samples_each_class=2): self.samples_each_class = samples_each_class self.train_batch_size = train_batch_size num_gpus = get_gpu_num() assert (train_batch_size % num_gpus == 0) self.cal_loss_batch_size = train_batch_size // num_gpus assert (self.cal_loss_batch_size % samples_each_class == 0)
def __init__(self, train_batch_size=160, samples_each_class=2, reg_lambda=0.01): self.samples_each_class = samples_each_class assert (self.samples_each_class == 2) self.train_batch_size = train_batch_size num_gpus = get_gpu_num() assert (train_batch_size % num_gpus == 0) self.cal_loss_batch_size = train_batch_size // num_gpus assert (self.cal_loss_batch_size % samples_each_class == 0) self.reg_lambda = reg_lambda
def train_async(args): # parameters from arguments logging.debug('enter train') model_name = args.model checkpoint = args.checkpoint pretrained_model = args.pretrained_model model_save_dir = args.model_save_dir startup_prog = fluid.Program() train_prog = fluid.Program() tmp_prog = fluid.Program() train_loader, train_cost, train_acc1, train_acc5, global_lr = build_program( main_prog=train_prog, startup_prog=startup_prog, args=args) train_fetch_list = [ global_lr.name, train_cost.name, train_acc1.name, train_acc5.name ] place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) if num_trainers <= 1 and args.use_gpu: places = fluid.framework.cuda_places() else: places = place exe.run(startup_prog) logging.debug('after run startup program') if checkpoint is not None: fluid.load(program=train_prog, model_path=checkpoint, executor=exe) if pretrained_model: load_pretrain(train_prog, pretrained_model) if args.use_gpu: devicenum = get_gpu_num() else: devicenum = 1 assert (args.train_batch_size % devicenum) == 0 train_batch_size = args.train_batch_size // devicenum train_loader.set_sample_generator( reader.train(args), batch_size=train_batch_size, drop_last=True, places=places) train_exe = fluid.ParallelExecutor( main_program=train_prog, use_cuda=args.use_gpu, loss_name=train_cost.name) totalruntime = 0 iter_no = 0 train_info = [0, 0, 0, 0] while iter_no <= args.total_iter_num: for train_batch in train_loader(): t1 = time.time() lr, loss, acc1, acc5 = train_exe.run(feed=train_batch, fetch_list=train_fetch_list) t2 = time.time() period = t2 - t1 lr = np.mean(np.array(lr)) train_info[0] += np.mean(np.array(loss)) train_info[1] += np.mean(np.array(acc1)) train_info[2] += np.mean(np.array(acc5)) train_info[3] += 1 if iter_no % args.display_iter_step == 0: avgruntime = totalruntime / args.display_iter_step avg_loss = train_info[0] / train_info[3] avg_acc1 = train_info[1] / train_info[3] avg_acc5 = train_info[2] / train_info[3] print("[%s] trainbatch %d, lr %.6f, loss %.6f, "\ "acc1 %.4f, acc5 %.4f, time %2.2f sec" % \ (fmt_time(), iter_no, lr, avg_loss, avg_acc1, avg_acc5, avgruntime)) sys.stdout.flush() totalruntime = 0 if iter_no % args.display_iter_step == 0: train_info = [0, 0, 0, 0] totalruntime += period if iter_no % args.save_iter_step == 0 and iter_no != 0: model_path = os.path.join(model_save_dir + '/' + model_name, str(iter_no)) if not os.path.isdir(model_path): os.makedirs(model_path) fluid.save(program=train_prog, model_path=model_path) iter_no += 1
def train_async(args): # parameters from arguments logging.debug('enter train') model_name = args.model checkpoint = args.checkpoint pretrained_model = args.pretrained_model model_save_dir = args.model_save_dir if not os.path.exists(model_save_dir): os.mkdir(model_save_dir) startup_prog = fluid.Program() train_prog = fluid.Program() tmp_prog = fluid.Program() train_loader, train_cost, global_lr, train_feas, train_label = build_program( is_train=True, main_prog=train_prog, startup_prog=startup_prog, args=args) test_loader, test_feas = build_program(is_train=False, main_prog=tmp_prog, startup_prog=startup_prog, args=args) test_prog = tmp_prog.clone(for_test=True) train_fetch_list = [ global_lr.name, train_cost.name, train_feas.name, train_label.name ] test_fetch_list = [test_feas.name] place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) if num_trainers <= 1 and args.use_gpu: places = fluid.framework.cuda_places() else: places = place exe.run(startup_prog) if checkpoint is not None: fluid.load(program=train_prog, model_path=checkpoint, executor=exe) if pretrained_model: load_params(exe, train_prog, pretrained_model) if args.use_gpu: devicenum = get_gpu_num() else: devicenum = int(os.environ.get('CPU_NUM', 1)) assert (args.train_batch_size % devicenum) == 0 train_batch_size = args.train_batch_size / devicenum test_batch_size = args.test_batch_size train_loader.set_sample_generator(reader.train(args), batch_size=train_batch_size, drop_last=True, places=places) test_loader.set_sample_generator(reader.test(args), batch_size=test_batch_size, drop_last=False, places=place) train_exe = fluid.ParallelExecutor(main_program=train_prog, use_cuda=args.use_gpu, loss_name=train_cost.name) totalruntime = 0 iter_no = 0 train_info = [0, 0, 0] while iter_no <= args.total_iter_num: for train_batch in train_loader(): t1 = time.time() lr, loss, feas, label = train_exe.run(feed=train_batch, fetch_list=train_fetch_list) t2 = time.time() period = t2 - t1 lr = np.mean(np.array(lr)) train_info[0] += np.mean(np.array(loss)) train_info[1] += recall_topk(feas, label, k=1) train_info[2] += 1 if iter_no % args.display_iter_step == 0: avgruntime = totalruntime / args.display_iter_step avg_loss = train_info[0] / train_info[2] avg_recall = train_info[1] / train_info[2] print("[%s] trainbatch %d, lr %.6f, loss %.6f, "\ "recall %.4f, time %2.2f sec" % \ (fmt_time(), iter_no, lr, avg_loss, avg_recall, avgruntime)) sys.stdout.flush() totalruntime = 0 if iter_no % 1000 == 0: train_info = [0, 0, 0] totalruntime += period if iter_no % args.test_iter_step == 0 and iter_no != 0: f, l = [], [] for batch_id, test_batch in enumerate(test_loader()): t1 = time.time() [feas] = exe.run(test_prog, feed=test_batch, fetch_list=test_fetch_list) label = np.asarray(test_batch[0]['label']) label = np.squeeze(label) f.append(feas) l.append(label) t2 = time.time() period = t2 - t1 if batch_id % 20 == 0: print("[%s] testbatch %d, time %2.2f sec" % \ (fmt_time(), batch_id, period)) f = np.vstack(f) l = np.hstack(l) recall = recall_topk(f, l, k=1) print("[%s] test_img_num %d, trainbatch %d, test_recall %.5f" % \ (fmt_time(), len(f), iter_no, recall)) sys.stdout.flush() if iter_no % args.save_iter_step == 0 and iter_no != 0: model_path = os.path.join(model_save_dir, model_name, str(iter_no)) fluid.save(program=train_prog, model_path=model_path) iter_no += 1
def train_async(args): # parameters from arguments logging.debug('enter train') model_name = args.model checkpoint = args.checkpoint pretrained_model = args.pretrained_model model_save_dir = args.model_save_dir startup_prog = fluid.Program() train_prog = fluid.Program() tmp_prog = fluid.Program() if args.enable_ce: assert args.model == "ResNet50" assert args.loss_name == "arcmargin" np.random.seed(0) startup_prog.random_seed = 1000 train_prog.random_seed = 1000 tmp_prog.random_seed = 1000 train_py_reader, train_cost, train_acc1, train_acc5, global_lr = build_program( is_train=True, main_prog=train_prog, startup_prog=startup_prog, args=args) test_feas, image, label = build_program(is_train=False, main_prog=tmp_prog, startup_prog=startup_prog, args=args) test_prog = tmp_prog.clone(for_test=True) train_fetch_list = [ global_lr.name, train_cost.name, train_acc1.name, train_acc5.name ] test_fetch_list = [test_feas.name] place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) logging.debug('after run startup program') if checkpoint is not None: fluid.io.load_persistables(exe, checkpoint, main_program=train_prog) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, main_program=train_prog, predicate=if_exist) if args.use_gpu: devicenum = get_gpu_num() else: devicenum = int(os.environ.get('CPU_NUM', 1)) assert (args.train_batch_size % devicenum) == 0 train_batch_size = args.train_batch_size // devicenum test_batch_size = args.test_batch_size train_reader = paddle.batch(reader.train(args), batch_size=train_batch_size, drop_last=True) test_reader = paddle.batch(reader.test(args), batch_size=test_batch_size, drop_last=False) test_feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) train_py_reader.decorate_paddle_reader(train_reader) train_exe = fluid.ParallelExecutor(main_program=train_prog, use_cuda=args.use_gpu, loss_name=train_cost.name) totalruntime = 0 train_py_reader.start() iter_no = 0 train_info = [0, 0, 0, 0] while iter_no <= args.total_iter_num: t1 = time.time() lr, loss, acc1, acc5 = train_exe.run(fetch_list=train_fetch_list) t2 = time.time() period = t2 - t1 lr = np.mean(np.array(lr)) train_info[0] += np.mean(np.array(loss)) train_info[1] += np.mean(np.array(acc1)) train_info[2] += np.mean(np.array(acc5)) train_info[3] += 1 if iter_no % args.display_iter_step == 0: avgruntime = totalruntime / args.display_iter_step avg_loss = train_info[0] / train_info[3] avg_acc1 = train_info[1] / train_info[3] avg_acc5 = train_info[2] / train_info[3] print("[%s] trainbatch %d, lr %.6f, loss %.6f, "\ "acc1 %.4f, acc5 %.4f, time %2.2f sec" % \ (fmt_time(), iter_no, lr, avg_loss, avg_acc1, avg_acc5, avgruntime)) sys.stdout.flush() totalruntime = 0 if iter_no % 1000 == 0: train_info = [0, 0, 0, 0] totalruntime += period if iter_no % args.test_iter_step == 0 and iter_no != 0: f, l = [], [] for batch_id, data in enumerate(test_reader()): t1 = time.time() [feas] = exe.run(test_prog, fetch_list=test_fetch_list, feed=test_feeder.feed(data)) label = np.asarray([x[1] for x in data]) f.append(feas) l.append(label) t2 = time.time() period = t2 - t1 if batch_id % 20 == 0: print("[%s] testbatch %d, time %2.2f sec" % \ (fmt_time(), batch_id, period)) f = np.vstack(f) l = np.hstack(l) recall = recall_topk(f, l, k=1) print("[%s] test_img_num %d, trainbatch %d, test_recall %.5f" % \ (fmt_time(), len(f), iter_no, recall)) sys.stdout.flush() if iter_no % args.save_iter_step == 0 and iter_no != 0: model_path = os.path.join(model_save_dir + '/' + model_name, str(iter_no)) if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_persistables(exe, model_path, main_program=train_prog) iter_no += 1 # This is for continuous evaluation only if args.enable_ce: # Use the mean cost/acc for training print("kpis\ttrain_cost\t{}".format(avg_loss)) print("kpis\ttest_recall\t{}".format(recall))
def train_async(args): # parameters from arguments logging.debug('enter train') model_name = args.model checkpoint = args.checkpoint pretrained_model = args.pretrained_model model_save_dir = args.model_save_dir startup_prog = fluid.Program() train_prog = fluid.Program() tmp_prog = fluid.Program() #测试使用,固定随机参数种子 if args.enable_ce: assert args.model == "ResNet50" assert args.loss_name == "arcmargin" np.random.seed(0) startup_prog.random_seed = 1000 train_prog.random_seed = 1000 tmp_prog.random_seed = 1000 trainclassify = args.loss_name in ["softmax", "arcmargin"] train_py_reader, outputvars = build_program(is_train=True, net_config=net_config_classify, main_prog=train_prog, startup_prog=startup_prog, args=args) if trainclassify: train_cost, train_acc1, train_acc5, global_lr = outputvars train_fetch_list = [ global_lr.name, train_cost.name, train_acc1.name, train_acc5.name ] evaltrain = EvalTrain_Classify() else: train_cost, train_feas, train_label, global_lr = outputvars train_fetch_list = [ global_lr.name, train_cost.name, train_feas.name, train_label.name ] evaltrain = EvalTrain_Metric() _, outputvars = build_program(is_train=False, net_config=net_config_test, main_prog=tmp_prog, startup_prog=startup_prog, args=args) test_feas, image, label = outputvars test_prog = tmp_prog.clone(for_test=True) test_fetch_list = [test_feas.name] #打开内存优化,可以节省显存使用(注意,取出的变量要使用skip_opt_set设置一下,否则有可能被优化覆写) if args.with_mem_opt: fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list)) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) #初始化变量 exe.run(startup_prog) logging.debug('after run startup program') #从断点中恢复 if checkpoint is not None: fluid.io.load_persistables(exe, checkpoint, main_program=train_prog) #加载预训练模型的参数到网络。如果使用预训练模型,最后一层fc需要改一下名字,或者删掉预训练模型的fc对应的权值文件 if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, main_program=train_prog, predicate=if_exist) #得到机器gpu卡数。 # if args.use_gpu: devicenum = get_gpu_num() assert (args.train_batch_size % devicenum) == 0 else: devicenum = get_cpu_num() assert (args.train_batch_size % devicenum) == 0 #注意: 使用py_reader 的输入的batch大小,是单卡的batch大小,所以要除一下 train_batch_size = args.train_batch_size // devicenum test_batch_size = args.test_batch_size logging.debug('device number is %d, batch on each card:%d', devicenum, train_batch_size) #创建新的train_reader 将输入的reader读入的数据组成batch 。另外将train_reader 连接到 pyreader,由pyreader创建的线程主动读取,不在主线程调用。 train_reader = paddle.batch(reader.train(args), batch_size=train_batch_size, drop_last=True) test_reader = paddle.batch(reader.test(args), batch_size=test_batch_size, drop_last=False) test_feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) train_py_reader.decorate_paddle_reader(train_reader) #使用ParallelExecutor 实现多卡训练 train_exe = fluid.ParallelExecutor(main_program=train_prog, use_cuda=args.use_gpu, loss_name=train_cost.name) totalruntime = 0 #启动pyreader的读取线程 train_py_reader.start() iter_no = 0 while iter_no <= args.total_iter_num: t1 = time.time() #注意对于pyreader异步读取,不需要传入feed 参数了 outputlist = train_exe.run(fetch_list=train_fetch_list) t2 = time.time() period = t2 - t1 evaltrain.pushdata(outputlist) #计算多个batch的平均准确率 if iter_no % args.display_iter_step == 0: avgruntime = totalruntime / args.display_iter_step train_accuracy = evaltrain.getaccuracy() print("[%s] trainbatch %d, "\ "accuracy[%s], time %2.2f sec" % \ (fmt_time(), iter_no, train_accuracy, avgruntime)) sys.stdout.flush() totalruntime = 0 if iter_no % 1000 == 0: evaltrain.reset() totalruntime += period if iter_no % args.test_iter_step == 0 and (pretrained_model or checkpoint or iter_no != 0): #保持多个batch的feature 和 label 分别到 f, l evaltest = EvalTest() max_test_count = 100 for batch_id, data in enumerate(test_reader()): t1 = time.time() test_outputlist = exe.run(test_prog, fetch_list=test_fetch_list, feed=test_feeder.feed(data)) label = np.asarray([x[1] for x in data]) evaltest.pushdata((test_outputlist[0], label)) t2 = time.time() period = t2 - t1 if batch_id % 20 == 0: print("[%s] testbatch %d, time %2.2f sec" % \ (fmt_time(), batch_id, period)) if batch_id > max_test_count: break #测试检索的准确率,当query和检索结果类别一致,检索正确。(这里测试数据集类别与训练数据集类别不重叠,因此网络输出的类别没有意义) test_recall = evaltest.getaccuracy() print("[%s] test_img_num %d, trainbatch %d, testaccarcy %s" % \ (fmt_time(), max_test_count * args.test_batch_size, iter_no, test_recall)) sys.stdout.flush() if iter_no % args.save_iter_step == 0 and iter_no != 0: model_path = os.path.join(model_save_dir + '/' + model_name, str(iter_no)) if not os.path.isdir(model_path): os.makedirs(model_path) #保存模型, 可用于训练断点恢复 fluid.io.save_persistables(exe, model_path, main_program=train_prog) iter_no += 1 # This is for continuous evaluation only if args.enable_ce: # Use the mean cost/acc for training print("kpis train_cost %s" % (avg_loss)) print("kpis test_recall %s" % (recall))