def main(args): cudnn.benchmark = True assert args.model in [ 'PortraitNet', 'ENet', 'BiSeNet' ], 'Error!, <model> should in [PortraitNet, ENet, BiSeNet]' config_path = args.config_path print('===========> loading config <============') print("config path: ", config_path) with open(config_path, 'rb') as f: cont = f.read() cf = load(cont) print('===========> loading data <===========') exp_args = edict() exp_args.istrain = cf['istrain'] # set the mode exp_args.task = cf['task'] # only support 'seg' now exp_args.datasetlist = cf['datasetlist'] #exp_args.model_root = cf['model_root'] #exp_args.data_root = cf['data_root'] #exp_args.file_root = cf['file_root'] exp_args.model_root = args.model_root exp_args.data_root = args.data_root exp_args.file_root = args.file_root # set log path logs_path = os.path.join(exp_args.model_root, 'log/') if os.path.exists(logs_path): shutil.rmtree(logs_path) logger_train = Logger(logs_path + 'train') logger_test = Logger(logs_path + 'test') # the height of input images, default=224 exp_args.input_height = cf['input_height'] # the width of input images, default=224 exp_args.input_width = cf['input_width'] # if exp_args.video=True, add prior channel for input images, default=False exp_args.video = cf['video'] # the probability to set empty prior channel, default=0.5 exp_args.prior_prob = cf['prior_prob'] # whether to add boundary auxiliary loss, default=False exp_args.addEdge = cf['addEdge'] # the weight of boundary auxiliary loss, default=0.1 exp_args.edgeRatio = cf['edgeRatio'] # whether to add consistency constraint loss, default=False exp_args.stability = cf['stability'] # whether to use KL loss in consistency constraint loss, default=True exp_args.use_kl = cf['use_kl'] # temperature in consistency constraint loss, default=1 exp_args.temperature = cf['temperature'] # the weight of consistency constraint loss, default=2 exp_args.alpha = cf['alpha'] # input normalization parameters exp_args.padding_color = cf['padding_color'] exp_args.img_scale = cf['img_scale'] # BGR order, image mean, default=[103.94, 116.78, 123.68] exp_args.img_mean = cf['img_mean'] # BGR order, image val, default=[1/0.017, 1/0.017, 1/0.017] exp_args.img_val = cf['img_val'] # whether to use pretian model to init portraitnet exp_args.init = cf['init'] # whether to continue training exp_args.resume = cf['resume'] # if exp_args.useUpsample==True, use nn.Upsample in decoder, else use nn.ConvTranspose2d exp_args.useUpsample = cf['useUpsample'] # if exp_args.useDeconvGroup==True, set groups=input_channel in nn.ConvTranspose2d exp_args.useDeconvGroup = cf['useDeconvGroup'] # model summary if args.summary: model_summary(args, exp_args) return if args.export_onnx: print('Exporting model to onnx format') export_to_onnx(args, exp_args) print('Done') return if args.export_coreml: print('Exporting model to coreml format') export_to_coreml(args, exp_args) print('Done') return # set training dataset exp_args.istrain = True dataset_train = Human(exp_args) print("image number in training: ", len(dataset_train)) dataLoader_train = torch.utils.data.DataLoader(dataset_train, batch_size=args.batchsize, shuffle=True, num_workers=args.workers) # set testing dataset exp_args.istrain = False dataset_test = Human(exp_args) print("image number in testing: ", len(dataset_test)) dataLoader_test = torch.utils.data.DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=args.workers) exp_args.istrain = True print("finish load dataset ...") print('===========> loading model <===========') if args.model == 'PortraitNet': # train our model: portraitnet import model_mobilenetv2_seg_small as modellib netmodel = modellib.MobileNetV2(n_class=2, useUpsample=exp_args.useUpsample, useDeconvGroup=exp_args.useDeconvGroup, addEdge=exp_args.addEdge, channelRatio=1.0, minChannel=16, weightInit=True, video=exp_args.video).cuda() print("finish load PortraitNet ...") elif args.model == 'BiSeNet': # train BiSeNet import model_BiSeNet as modellib netmodel = modellib.BiSeNet(n_class=2, useUpsample=exp_args.useUpsample, useDeconvGroup=exp_args.useDeconvGroup, addEdge=exp_args.addEdge).cuda() print("finish load BiSeNet ...") elif args.model == 'ENet': # trian ENet import model_enet as modellib netmodel = modellib.ENet(n_class=2).cuda() print("finish load ENet ...") # optimizer = torch.optim.SGD(netmodel.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weightdecay) params, multiple = get_parameters(netmodel, args, useDeconvGroup=exp_args.useDeconvGroup) # optimizer = torch.optim.SGD(params, args.lr, momentum=args.momentum, weight_decay=args.weightdecay) optimizer = torch.optim.Adam(params, args.lr, weight_decay=args.weightdecay) if exp_args.init == True: pretrained_state_dict = torch.load('pretrained_mobilenetv2_base.pth') pretrained_state_dict_keys = list(pretrained_state_dict.keys()) netmodel_state_dict = netmodel.state_dict() netmodel_state_dict_keys = list(netmodel.state_dict().keys()) print("pretrain keys: ", len(pretrained_state_dict_keys)) print("netmodel keys: ", len(netmodel_state_dict_keys)) weights_load = {} for k in range(len(pretrained_state_dict_keys)): if pretrained_state_dict[pretrained_state_dict_keys[k]].shape == \ netmodel_state_dict[netmodel_state_dict_keys[k]].shape: weights_load[netmodel_state_dict_keys[ k]] = pretrained_state_dict[pretrained_state_dict_keys[k]] print('init model', netmodel_state_dict_keys[k], 'from pretrained', pretrained_state_dict_keys[k]) else: break print("init len is:", len(weights_load)) netmodel_state_dict.update(weights_load) netmodel.load_state_dict(netmodel_state_dict) print("load model init finish...") if exp_args.resume: bestModelFile = os.path.join(exp_args.model_root, 'model_best.pth.tar') if os.path.isfile(bestModelFile): checkpoint = torch.load(bestModelFile) netmodel.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) gap = checkpoint['epoch'] minLoss = checkpoint['minLoss'] print("=> loaded checkpoint '{}' (epoch {})".format( bestModelFile, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(bestModelFile)) else: minLoss = 10000 gap = 0 for epoch in range(gap, 2000): adjust_learning_rate(optimizer, epoch, args, multiple) print('===========> training <===========') train(dataLoader_train, netmodel, optimizer, epoch, logger_train, exp_args) print('===========> testing <===========') loss = test(dataLoader_test, netmodel, optimizer, epoch, logger_test, exp_args) print("loss: ", loss, minLoss) is_best = False if loss < minLoss: minLoss = loss is_best = True save_checkpoint( { 'epoch': epoch + 1, 'minLoss': minLoss, 'state_dict': netmodel.state_dict(), 'optimizer': optimizer.state_dict(), }, is_best, exp_args.model_root)
def main(args): print ('===========> loading config <============') config_path = args.config_path print ("config path: ", config_path) with open(config_path, 'rb') as f: cont = f.read() cf = load(cont) exp_args = edict() exp_args.istrain = cf['istrain'] # set the mode exp_args.task = cf['task'] # only support 'seg' now exp_args.datasetlist = cf['datasetlist'] exp_args.model_root = cf['model_root'] exp_args.data_root = cf['data_root'] exp_args.file_root = cf['file_root'] # the height of input images, default=224 exp_args.input_height = cf['input_height'] # the width of input images, default=224 exp_args.input_width = cf['input_width'] # if exp_args.video=True, add prior channel for input images, default=False exp_args.video = cf['video'] # the probability to set empty prior channel, default=0.5 exp_args.prior_prob = cf['prior_prob'] # whether to add boundary auxiliary loss, default=False exp_args.addEdge = cf['addEdge'] # the weight of boundary auxiliary loss, default=0.1 exp_args.edgeRatio = cf['edgeRatio'] # whether to add consistency constraint loss, default=False exp_args.stability = cf['stability'] # whether to use KL loss in consistency constraint loss, default=True exp_args.use_kl = cf['use_kl'] # temperature in consistency constraint loss, default=1 exp_args.temperature = cf['temperature'] # the weight of consistency constraint loss, default=2 exp_args.alpha = cf['alpha'] # input normalization parameters exp_args.padding_color = cf['padding_color'] exp_args.img_scale = cf['img_scale'] # BGR order, image mean, default=[103.94, 116.78, 123.68] exp_args.img_mean = cf['img_mean'] # BGR order, image val, default=[1/0.017, 1/0.017, 1/0.017] exp_args.img_val = cf['img_val'] # whether to use pretian model to init portraitnet exp_args.init = cf['init'] # whether to continue training exp_args.resume = cf['resume'] # if exp_args.useUpsample==True, use nn.Upsample in decoder, else use nn.ConvTranspose2d exp_args.useUpsample = cf['useUpsample'] # if exp_args.useDeconvGroup==True, set groups=input_channel in nn.ConvTranspose2d exp_args.useDeconvGroup = cf['useDeconvGroup'] print ('===========> loading data <===========') # set testing dataset exp_args.istrain = False dataset_test = Human(exp_args) dataLoader_test = torch.utils.data.DataLoader(dataset_test, batch_size=args.batchsize, shuffle=True, num_workers=args.workers) print ("image number in testing: ", len(dataset_test)) # set training dataset exp_args.istrain = True dataset_train = Human(exp_args) dataLoader_train = torch.utils.data.DataLoader(dataset_train, batch_size=args.batchsize, shuffle=True, num_workers=args.workers) print ("image number in training: ", len(dataset_train)) print ("finish load dataset ...") print ('===========> loading model <===========') # train our model: portraitnet import model_mobilenetv2_seg_small_tf as modellib netmodel = modellib.MobileNetV2(n_class=2, addEdge=exp_args.addEdge, channelRatio=1.0, minChannel=16, train=exp_args.istrain) print ("finish load PortraitNet ...") with tf.variable_scope('Inputs'): x = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3], name='x_input') y = tf.placeholder(dtype=tf.int64, shape=[None, 224, 224], name='y_input') pred = netmodel.build(x) with tf.variable_scope('loss'): softmaxs = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=pred) softmaxs_loss = tf.reduce_mean(softmaxs) gap = 0 global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(0.001, global_step, 20, 0.95, staircase=True) with tf.variable_scope('train'): optimizer = tf.train.AdamOptimizer(learning_rate).minimize(softmaxs_loss) with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) for epoch in range(gap, 2000): print ('===========> training <===========') for i, (input_ori, input, edge, mask) in enumerate(dataLoader_train): input_x = input.cpu().detach().numpy() input_x = np.transpose(input_x, (0, 2, 3, 1)) input_y = mask.cpu().detach().numpy() _, loss_ = sess.run([optimizer, softmaxs_loss], {x: input_x, y: input_y}) print(loss_) if i % args.printfreq == 0: print('Step:', epoch, '| train loss: %.4f' % loss_) ''' print ('===========> testing <===========') for i, (input_ori, input, edge, mask) in enumerate(dataLoader_test): input_x = input.cpu().detach().numpy() input_x = np.transpose(input_x, (0, 2, 3, 1)) input_y = mask.cpu().detach().numpy() loss_ = sess.run(softmaxs_loss, {x: input_x, y: input_y}) if i % args.printfreq == 0: print('Step:', epoch, '| train loss: %.4f' % loss_) ''' constant_graph = tf.get_default_graph().as_graph_def() with tf.gfile.FastGFile('model.pb', mode='wb') as f: f.write(constant_graph.SerializeToString())
def main(args): print ('===========> loading config <============') config_path = args.config_path print ("config path: ", config_path) with open(config_path, 'rb') as f: cont = f.read() cf = load(cont) exp_args = edict() exp_args.istrain = cf['istrain'] # set the mode exp_args.task = cf['task'] # only support 'seg' now exp_args.datasetlist = cf['datasetlist'] exp_args.model_root = cf['model_root'] exp_args.data_root = cf['data_root'] exp_args.file_root = cf['file_root'] # the height of input images, default=224 exp_args.input_height = cf['input_height'] # the width of input images, default=224 exp_args.input_width = cf['input_width'] # if exp_args.video=True, add prior channel for input images, default=False exp_args.video = cf['video'] # the probability to set empty prior channel, default=0.5 exp_args.prior_prob = cf['prior_prob'] # whether to add boundary auxiliary loss, default=False exp_args.addEdge = cf['addEdge'] # the weight of boundary auxiliary loss, default=0.1 exp_args.edgeRatio = cf['edgeRatio'] # whether to add consistency constraint loss, default=False exp_args.stability = cf['stability'] # whether to use KL loss in consistency constraint loss, default=True exp_args.use_kl = cf['use_kl'] # temperature in consistency constraint loss, default=1 exp_args.temperature = cf['temperature'] # the weight of consistency constraint loss, default=2 exp_args.alpha = cf['alpha'] # input normalization parameters exp_args.padding_color = cf['padding_color'] exp_args.img_scale = cf['img_scale'] # BGR order, image mean, default=[103.94, 116.78, 123.68] exp_args.img_mean = cf['img_mean'] # BGR order, image val, default=[1/0.017, 1/0.017, 1/0.017] exp_args.img_val = cf['img_val'] # whether to use pretian model to init portraitnet exp_args.init = cf['init'] # whether to continue training exp_args.resume = cf['resume'] # if exp_args.useUpsample==True, use nn.Upsample in decoder, else use nn.ConvTranspose2d exp_args.useUpsample = cf['useUpsample'] # if exp_args.useDeconvGroup==True, set groups=input_channel in nn.ConvTranspose2d exp_args.useDeconvGroup = cf['useDeconvGroup'] print ('===========> loading data <===========') # set training dataset exp_args.istrain = True dataset_train = Human(exp_args) dataLoader_train = torch.utils.data.DataLoader(dataset_train, batch_size=args.batchsize, shuffle=True, num_workers=args.workers) print ("image number in training: ", len(dataset_train)) print ("finish load dataset ...") print ('===========> loading model <===========') # train our model: portraitnet import model_mobilenetv2_seg_small_tf_lite as modellib netmodel = modellib.MobileNetV2(n_class=2, addEdge=exp_args.addEdge, channelRatio=1.0, minChannel=16) print ("finish load PortraitNet ...") with tf.variable_scope('Inputs'): x = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3], name='x_input') y = tf.placeholder(dtype=tf.int64, shape=[None, 224, 224], name='y_input') z = tf.placeholder(dtype=tf.int64, shape=[None, 224, 224], name='z_input') # x_ori = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3], name='xori_input') pred, edge = netmodel.build(x) # with tf.variable_scope('Ori'): # pred_ori, edge_ori = netmodel.build(x_ori) result = tf.nn.softmax(pred, name='result', dim=-1) with tf.variable_scope('loss'): softmaxs = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=pred) softmaxsLoss = tf.reduce_mean(softmaxs, name="mean") focalLoss = focal_loss(edge, z) * exp_args.edgeRatio # softmaxs_ori = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=pred_ori) # softmaxsLoss_ori = tf.reduce_mean(softmaxs_ori, name="mean_ori") # KL = tf.keras.losses.KLDivergence() # stabilityLoss = KL(pred, pred_ori) * exp_args.alpha # lossSum = softmaxsLoss + focalLoss + softmaxsLoss_ori + stabilityLoss lossSum = softmaxsLoss + focalLoss gap = 0 minloss =10000.0 global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(0.001, global_step, 20, 0.95, staircase=True) with tf.variable_scope('train'): optimizer = tf.train.AdamOptimizer(learning_rate).minimize(lossSum) with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) saver = tf.train.Saver() for epoch in range(gap, 2000): print ('===========> training <==========={}/{}'.format(epoch+1, 2000)) for i, (input_ori, input, edge, mask) in enumerate(dataLoader_train): input_x = input.cpu().detach().numpy() input_x = np.transpose(input_x, (0, 2, 3, 1)) # input_xori = input_ori.cpu().detach().numpy() # input_xori = np.transpose(input_xori, (0, 2, 3, 1)) input_y = mask.cpu().detach().numpy() input_z = edge.cpu().detach().numpy() _, loss_ = sess.run([optimizer, lossSum], {x: input_x, y: input_y, z: input_z}) # _, loss_ = sess.run([optimizer, lossSum], {x: input_x, y: input_y, z: input_z, x_ori: input_xori}) if loss_ < minloss: minloss = loss_ if minloss < 0.1: saver.save(sess, './Model/tf_lite_4_{}'.format(epoch)) print('Save') print("minloss:", minloss)
def main(): print('===========> loading config <============') config_path = '/home/yupeng/Program/python/PortraitNet/config/model_mobilenetv2_without_auxiliary_losses.yaml' print("config path: ", config_path) with open(config_path, 'rb') as f: cont = f.read() cf = load(cont) exp_args = edict() exp_args.istrain = cf['istrain'] # set the mode exp_args.task = cf['task'] # only support 'seg' now exp_args.datasetlist = cf['datasetlist'] exp_args.model_root = cf['model_root'] exp_args.data_root = cf['data_root'] exp_args.file_root = cf['file_root'] # the height of input images, default=224 exp_args.input_height = cf['input_height'] # the width of input images, default=224 exp_args.input_width = cf['input_width'] # if exp_args.video=True, add prior channel for input images, default=False exp_args.video = cf['video'] # the probability to set empty prior channel, default=0.5 exp_args.prior_prob = cf['prior_prob'] # whether to add boundary auxiliary loss, default=False exp_args.addEdge = cf['addEdge'] # the weight of boundary auxiliary loss, default=0.1 exp_args.edgeRatio = cf['edgeRatio'] # whether to add consistency constraint loss, default=False exp_args.stability = cf['stability'] # whether to use KL loss in consistency constraint loss, default=True exp_args.use_kl = cf['use_kl'] # temperature in consistency constraint loss, default=1 exp_args.temperature = cf['temperature'] # the weight of consistency constraint loss, default=2 exp_args.alpha = cf['alpha'] # input normalization parameters exp_args.padding_color = cf['padding_color'] exp_args.img_scale = cf['img_scale'] # BGR order, image mean, default=[103.94, 116.78, 123.68] exp_args.img_mean = cf['img_mean'] # BGR order, image val, default=[1/0.017, 1/0.017, 1/0.017] exp_args.img_val = cf['img_val'] # whether to use pretian model to init portraitnet exp_args.init = cf['init'] # whether to continue training exp_args.resume = cf['resume'] # if exp_args.useUpsample==True, use nn.Upsample in decoder, else use nn.ConvTranspose2d exp_args.useUpsample = cf['useUpsample'] # if exp_args.useDeconvGroup==True, set groups=input_channel in nn.ConvTranspose2d exp_args.useDeconvGroup = cf['useDeconvGroup'] print('===========> loading data <===========') # set testing dataset exp_args.istrain = False dataset_test = Human(exp_args) dataLoader_test = torch.utils.data.DataLoader(dataset_test, batch_size=args.batchsize, shuffle=True, num_workers=args.workers) print("image number in testing: ", len(dataset_test)) ''' print ('===========> loading model <===========') # train our model: portraitnet import model_mobilenetv2_seg_small_tf as modellib netmodel = modellib.MobileNetV2(n_class=2, addEdge=exp_args.addEdge, channelRatio=1.0, minChannel=16) print ("finish load PortraitNet ...") with tf.variable_scope('Inputs'): x = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3], name='x_input') y = tf.placeholder(dtype=tf.int64, shape=[None, 224, 224], name='y_input') pred = netmodel.build(x) #result = tf.nn.softmax(pred, name='result') with tf.variable_scope('loss'): softmaxs = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=pred) softmaxs_loss = tf.reduce_mean(softmaxs) gap = 0 global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(0.001, global_step, 20, 0.95, staircase=True) with tf.variable_scope('train'): optimizer = tf.train.AdamOptimizer(learning_rate).minimize(softmaxs_loss) ''' img_ori = cv2.imread( "/home/yupeng/Program/python/Data/EG1800/Images/00457.png") with tf.Session() as sess: ''' with tf.gfile.FastGFile('model.pb', 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) sess.graph.as_default() tf.import_graph_def(graph_def, name='') writer = tf.summary.FileWriter("logs", tf.get_default_graph()) writer.close() ''' saver = tf.train.import_meta_graph('Model/model.meta') saver.restore(sess, "Model/model") graph = tf.get_default_graph() x = graph.get_tensor_by_name("Inputs/x_input:0") y = graph.get_tensor_by_name("Inputs/y_input:0") pred = graph.get_tensor_by_name("pred:0") print('===========> testing <===========') for i, (input_ori, input, edge, mask) in enumerate(dataLoader_test): input_x = input.cpu().detach().numpy() input_x = np.transpose(input_x, (0, 2, 3, 1)) result = sess.run(pred, {x: input_x}) softmaxs = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=y, logits=result) softmaxs_loss = tf.reduce_mean(softmaxs) print(softmaxs_loss)