def __init__(self): super(Discriminator, self).__init__() # self.model = resnext101_64x4d(num_classes=1000, pretrained='imagenet') # num_ftrs = self.model.last_linear.in_features # self.model.last_linear = nn.Sequential( # nn.Linear(num_ftrs, hparams.num_classes), # nn.Sigmoid()) self.feature = vgg19(num_classes=1000, pretrained='imagenet', progress=True) # print(self.model) num_ftrs = 512 * 2 * 2 #self.model.classifier.in_features self.feature.classifier = nn.Sequential() self.feature.avgpool = nn.Sequential() self.intermediate = nn.Sequential( nn.Linear(num_ftrs, 512), nn.ReLU(True), nn.Dropout(hparams.drop_rate), ) self.classifier = nn.Sequential( nn.Linear(9 * 512, 4096), nn.ReLU(True), nn.Dropout(hparams.drop_rate), nn.Linear(4096, 4096), nn.ReLU(True), nn.Dropout(hparams.drop_rate), nn.Linear(4096, 1000), )
def main(): # parse the argument parser = argparse.ArgumentParser() parser.add_argument( 'data_list', help='The path of data list file, which consists of one image path per line' ) parser.add_argument( 'model', help='The model for image classification', choices=[ 'alexnet', 'vgg13', 'vgg16', 'vgg19', 'resnet', 'googlenet', 'inception-resnet-v2', 'inception_v4', 'xception' ]) parser.add_argument( 'params_path', help='The file which stores the parameters') args = parser.parse_args() # PaddlePaddle init paddle.init(use_gpu=True, trainer_count=1) image = paddle.layer.data( name="image", type=paddle.data_type.dense_vector(DATA_DIM)) if args.model == 'alexnet': out = alexnet.alexnet(image, class_dim=CLASS_DIM) elif args.model == 'vgg13': out = vgg.vgg13(image, class_dim=CLASS_DIM) elif args.model == 'vgg16': out = vgg.vgg16(image, class_dim=CLASS_DIM) elif args.model == 'vgg19': out = vgg.vgg19(image, class_dim=CLASS_DIM) elif args.model == 'resnet': out = resnet.resnet_imagenet(image, class_dim=CLASS_DIM) elif args.model == 'googlenet': out, _, _ = googlenet.googlenet(image, class_dim=CLASS_DIM) elif args.model == 'inception-resnet-v2': assert DATA_DIM == 3 * 331 * 331 or DATA_DIM == 3 * 299 * 299 out = inception_resnet_v2.inception_resnet_v2( image, class_dim=CLASS_DIM, dropout_rate=0.5, data_dim=DATA_DIM) elif args.model == 'inception_v4': out = inception_v4.inception_v4(image, class_dim=CLASS_DIM) elif args.model == 'xception': out = xception.xception(image, class_dim=CLASS_DIM) # load parameters with gzip.open(args.params_path, 'r') as f: parameters = paddle.parameters.Parameters.from_tar(f) file_list = [line.strip() for line in open(args.data_list)] test_data = [(paddle.image.load_and_transform(image_file, 256, 224, False) .flatten().astype('float32'), ) for image_file in file_list] probs = paddle.infer( output_layer=out, parameters=parameters, input=test_data) lab = np.argsort(-probs) for file_name, result in zip(file_list, lab): print "Label of %s is: %d" % (file_name, result[0])
def init_net(self): net_args = { "pretrained": True, "n_input_channels": len(self.kwargs["static"]["imagery_bands"]) } # https://pytorch.org/docs/stable/torchvision/models.html if self.kwargs["net"] == "resnet18": self.model = resnet.resnet18(**net_args) elif self.kwargs["net"] == "resnet34": self.model = resnet.resnet34(**net_args) elif self.kwargs["net"] == "resnet50": self.model = resnet.resnet50(**net_args) elif self.kwargs["net"] == "resnet101": self.model = resnet.resnet101(**net_args) elif self.kwargs["net"] == "resnet152": self.model = resnet.resnet152(**net_args) elif self.kwargs["net"] == "vgg11": self.model = vgg.vgg11(**net_args) elif self.kwargs["net"] == "vgg11_bn": self.model = vgg.vgg11_bn(**net_args) elif self.kwargs["net"] == "vgg13": self.model = vgg.vgg13(**net_args) elif self.kwargs["net"] == "vgg13_bn": self.model = vgg.vgg13_bn(**net_args) elif self.kwargs["net"] == "vgg16": self.model = vgg.vgg16(**net_args) elif self.kwargs["net"] == "vgg16_bn": self.model = vgg.vgg16_bn(**net_args) elif self.kwargs["net"] == "vgg19": self.model = vgg.vgg19(**net_args) elif self.kwargs["net"] == "vgg19_bn": self.model = vgg.vgg19_bn(**net_args) else: raise ValueError("Invalid network specified: {}".format( self.kwargs["net"])) # run type: 1 = fine tune, 2 = fixed feature extractor # - replace run type option with "# of layers to fine tune" if self.kwargs["run_type"] == 2: layer_count = len(list(self.model.parameters())) for layer, param in enumerate(self.model.parameters()): if layer <= layer_count - 5: param.requires_grad = False # Parameters of newly constructed modules have requires_grad=True by default # get existing number for input features # set new number for output features to number of categories being classified # see: https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html if "resnet" in self.kwargs["net"]: num_ftrs = self.model.fc.in_features self.model.fc = nn.Linear(num_ftrs, self.ncats) elif "vgg" in self.kwargs["net"]: num_ftrs = self.model.classifier[6].in_features self.model.classifier[6] = nn.Linear(num_ftrs, self.ncats)
def __init__(self, style_weight = STYLE_WEIGHT): super(StyleTransferModel, self).__init__() # NOTE: you may check on `pretrained` if you want to download complete version of vgg19 weights want_to_download_vgg19 = False self.enc_net = vgg19(pretrained=want_to_download_vgg19, end_with='conv4_1', name='content_and_style_enc') if not want_to_download_vgg19 and osp.exists(VGG19_PARTIAL_WEIGHTS_PATH): self.enc_net.load_weights(VGG19_PARTIAL_WEIGHTS_PATH, in_order=False) tl.logging.info(f"Encoder weights loaded from: {VGG19_PARTIAL_WEIGHTS_PATH}") # NOTE: batch_norm=False->True will lower quality of the generated image = may need retrain self.dec_net = vgg19_rev(pretrained=False, batch_norm=USE_BATCH_NORM, input_depth=512, name='stylized_dec') if osp.exists(DEC_LATEST_WEIGHTS_PATH): self.dec_net.load_weights(DEC_LATEST_WEIGHTS_PATH, skip=True) tl.logging.info(f"Decoder weights loaded from: {DEC_LATEST_WEIGHTS_PATH}") self.style_weight = style_weight self.content_loss, self.style_loss, self.loss = None, None, None
def test_conv_and_deconv(): VGG19_WEIGHTS_PATH = 'pretrained_models/predefined_vgg19_endwith(conv4_1)_weights.h5' VGG19_REV_WEIGHTS_PATH = 'pretrained_models/dec_best_weights (before use DeConv2d).h5' TEMP_IMAGE_PATH = './temp_images/53154.jpg' # try directly decoding content features enc_net = vgg19(pretrained=False, end_with='conv4_1') dec_net = vgg19_rev(pretrained=False, end_with='conv1_1', input_depth=512) enc_net.load_weights(VGG19_WEIGHTS_PATH) dec_net.load_weights(VGG19_REV_WEIGHTS_PATH, skip=True) enc_net.eval() dec_net.eval() image = imread(TEMP_IMAGE_PATH, mode='RGB') image = imresize_square(image, long_side=512, interp='nearest') content_features = enc_net([image]) generated_images = dec_net(content_features) imsave(TEMP_IMAGE_PATH + '!generated.jpg', generated_images[0].numpy())
def __init__(self, *args, **kwargs): super(StyleTransferModel, self).__init__(*args, **kwargs) # NOTE: you may use a vgg19 instance for both content encoder and style encoder, just as in train.py # self.enc_c_net = vgg19(pretrained=True, end_with='conv4_1', name='content') # self.enc_s_net = vgg19(pretrained=True, end_with='conv4_1', name='style') self.enc_net = vgg19(pretrained=False, end_with='conv4_1', name='content_and_style_enc') if os.path.exists(VGG19_PARTIAL_WEIGHTS_PATH): self.enc_net.load_weights(VGG19_PARTIAL_WEIGHTS_PATH, in_order=False) self.dec_net = vgg19_rev(pretrained=False, end_with='conv1_1', input_depth=512, name='stylized_dec') if os.path.exists(DEC_BEST_WEIGHTS_PATH): self.dec_net.load_weights(DEC_BEST_WEIGHTS_PATH, skip=True)
def load_model(args): if args.model == 'vgg11': model = vgg.vgg11().to(device) if args.model == 'vgg13': model = vgg.vgg13().to(device) if args.model == 'vgg16': model = vgg.vgg16().to(device) elif args.model == 'vgg19': model = vgg.vgg19().to(device) elif args.model == 'modified_vgg11': model = modified_vgg.vgg11().to(device) elif args.model == 'modified_vgg13': model = modified_vgg.vgg13().to(device) elif args.model == 'modified_vgg16': model = modified_vgg.vgg16().to(device) elif args.model == 'modified_vgg19': model = modified_vgg.vgg19().to(device) return model
def main(): parser = argparse.ArgumentParser(description='Pytorch implementation of Neural Artistic Style Transfer') parser.add_argument('--w_content', default=1.0, type=float, help='Weight for content loss') parser.add_argument('--w_style', default=10000.0, type=float, help='Weight for style loss') parser.add_argument('--img_content', default='content.jpg', help='Image name for content') parser.add_argument('--img_style', default='style.jpg', help='Image name for style') parser.add_argument('--iteration', '-i', default=50, type=int, help='Total iteration') args = parser.parse_args() ### Setting parameters ### w_content = args.w_content w_style = args.w_style iteration = args.iteration ### Load Model ### net = vgg.vgg19(pretrained=True).cuda().eval() ### Load Images ### image_content, image_style = model.image_loader(args.img_content, args.img_style) image_modify = image_content.clone() image_modify.requires_grad = True ### Iteration ### net_m, content_losses, style_losses = model.get_layer_out(net, image_content, image_style) optimi = optim.LBFGS([image_modify]) for epoch in range(iteration): def closure(): optimi.zero_grad() net_m(image_modify) content_loss_sum = 0.0 style_loss_sum = 0.0 for c in content_losses: content_loss_sum += c.loss for s in style_losses: style_loss_sum += s.loss loss = style_loss_sum * w_style + content_loss_sum * w_content loss.backward() if True: print('epoch: {}, loss: {} / {} / {}'.format(epoch, loss.data, style_loss_sum.data*w_style, content_loss_sum.data*w_content)) return loss optimi.step(closure) image_modify.data.clamp_(0, 1) utils.save_image(torch.squeeze(image_modify), 'outout{}.jpg'.format(epoch))
def test_test_model_single_call(): from vgg import vgg19, vgg19_rev import os.path as osp import tensorlayer as tl VGG19_PARTIAL_WEIGHTS_PATH = 'pretrained_models/predefined_vgg19_endwith(conv4_1)_weights.h5' DEC_BEST_WEIGHTS_PATH = 'pretrained_models/dec_best_weights.h5' CONTENT_DATA_PATH = './test_images/content' STYLE_DATA_PATH = './test_images/style' test_content_filenames = ['brad_pitt_01.jpg'] test_style_filenames = ['cat.jpg'] TEST_INPUT_CONSTRAINTED_SIZE = 800 TEST_OUTPUT_PATH = './test_images/output' tl.logging.set_verbosity(tl.logging.DEBUG) enc_net = vgg19(pretrained=False, end_with='conv4_1') enc_net.load_weights(VGG19_PARTIAL_WEIGHTS_PATH, in_order=False) dec_net = vgg19_rev(pretrained=False, batch_norm=False, input_depth=512) dec_net.load_weights(DEC_BEST_WEIGHTS_PATH, skip=True) i = 0 # only test 1 pair of input test_content = utils.imread( osp.join(CONTENT_DATA_PATH, test_content_filenames[i])) test_style = utils.imread( osp.join(STYLE_DATA_PATH, test_style_filenames[i])) # import cv2 # test_content = cv2.cvtColor(test_content, cv2.COLOR_BGR2RGB) # <- moved to utils.imread # test_style = cv2.cvtColor(test_style, cv2.COLOR_BGR2RGB) # <- moved to utils.imread content_features = enc_net(test_content, is_train=False) style_features = enc_net(test_style, is_train=False) target_features = utils.AdaIN(content_features, style_features, alpha=1) del content_features, style_features generated = dec_net(target_features, is_train=False) import tensorflow as tf if isinstance(generated, tf.Tensor): if generated.dtype == tf.float32: generated = tf.cast(generated, tf.uint8) generated = generated[0].numpy() saved_path = f"{osp.splitext(test_style_filenames[i])[0]}+{osp.splitext(test_content_filenames[i])[0]}" saved_path = osp.join(TEST_OUTPUT_PATH, f"{saved_path}.jpg") # generated = cv2.cvtColor(generated, cv2.COLOR_RGB2BGR) # <- moved to utils.imsave utils.imsave(saved_path, generated) tl.logging.info(f"saved_path = {saved_path}") tl.logging.info(f"generated.shape = {generated.shape}")
def test_test_arbitrary_sized_inputs(): from vgg import vgg19, vgg19_rev import os.path as osp import tensorlayer as tl DEC_LATEST_WEIGHTS_PATH = 'pretrained_models/dec_latest_weights.h5' STYLE_LAYERS = ('conv1_1', 'conv2_1', 'conv3_1', 'conv4_1') # for Encoders CONTENT_DATA_PATH = './dataset/content_samples' # COCO_train_2014/' STYLE_DATA_PATH = './dataset/style_samples' # wiki_all_images/' test_content_filenames = ['000000532397.jpg' ] #, '000000048289.jpg', '000000526781.jpg'] test_style_filenames = ['53154.jpg'] #, '2821.jpg', '216.jpg'] TEST_INPUT_CONSTRAINTED_SIZE = 800 TEMP_IMAGE_PATH = './temp_images/' tl.logging.set_verbosity(tl.logging.DEBUG) enc_net = vgg19(pretrained=True, end_with='conv4_1') # NOTE: batch_norm=True will lower quality of the generated image = need retrain dec_net = vgg19_rev(pretrained=False, batch_norm=False, input_depth=512) if osp.exists(DEC_LATEST_WEIGHTS_PATH): dec_net.load_weights(DEC_LATEST_WEIGHTS_PATH, skip=True) enc_net.eval() dec_net.eval() for epoch in range(1): # for test generator validity # Note: generator need reset for reuse test_inputs_gen = utils.single_inputs_generator( list(zip(test_content_filenames, test_style_filenames)), CONTENT_DATA_PATH, STYLE_DATA_PATH, TEST_INPUT_CONSTRAINTED_SIZE) for i, (test_content, test_style) in enumerate(test_inputs_gen): # shape=[1, w, h, c], so as to feed arbitrary sized test images one by one content_features = enc_net(test_content) style_features = enc_net(test_style, ) target_features = utils.AdaIN(content_features, style_features, alpha=1) del content_features, style_features generated_images = dec_net(target_features) paired_name = f"{osp.splitext(test_style_filenames[i])[0]}+{osp.splitext(test_content_filenames[i])[0]}" utils.imsave( osp.join(TEMP_IMAGE_PATH, f"temp_{paired_name}_epoch{epoch}.jpg"), generated_images[0].numpy())
NeptuneLog() if model_name == 'vgg11': model = vgg.vgg11(pretrained=pretrain_check) elif model_name == 'vgg11_bn': model = vgg.vgg11_bn(pretrained=pretrain_check) elif model_name == 'vgg13': model = vgg.vgg13(pretrained=pretrain_check) elif model_name == 'vgg13_bn': model = vgg.vgg13_bn(pretrained=pretrain_check) elif model_name == 'vgg16': model = vgg.vgg16(pretrained=pretrain_check) elif model_name == 'vgg16_bn': model = vgg.vgg16_bn(pretrained=pretrain_check) elif model_name == 'vgg19': model = vgg.vgg19(pretrained=pretrain_check) elif model_name == 'vgg19_bn': model = vgg.vgg19_bn(pretrained=pretrain_check) model.eval() model = torch.nn.DataParallel(model).cuda() optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-5) scheduler = ReduceLROnPlateau(optimizer, factor=0.01, patience=patience, mode='min')
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch CIFAR-10 BNN') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=64, metavar='N', help='input batch size for testing (default: 100)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--cuda-num', type=int, default=0, help='Choses GPU number') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=200, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--weight-decay', type=float, default=0, metavar='W', help='coefficient of L2 regulariztion') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') parser.add_argument('--model', type=str, default='vgg16', help='Model choice') parser.add_argument('--binarized', action='store_true', default=False, help='Makes model binary') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device( "cuda:%d" % args.cuda_num if torch.cuda.is_available() else "cpu") print("Use device:", device) kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} train_loader = torch.utils.data.DataLoader(datasets.ImageNet( '../data/', split='train', download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ])), batch_size=args.batch_size, shuffle=True, **kwargs) train_loader_aug = torch.utils.data.DataLoader(datasets.ImageNet( '../data/', split='train', download=True, transform=transforms.Compose([ transforms.RandomAffine(degrees=35, shear=0.2), transforms.RandomCrop(224, padding=5), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ])), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.ImageNet( '../data/', split='val', download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) test_loader_aug = torch.utils.data.DataLoader( datasets.ImageNet('../data/', split='val', download=True, transform=transforms.Compose([ transforms.RandomAffine(degrees=35, shear=0.2), transforms.RandomCrop(224, padding=5), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) if args.model == 'vgg16': model = vgg16(binarized=args.binarized).to(device) elif args.model == 'vgg19': model = vgg19(binarized=args.binarized).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = StepLR(optimizer, step_size=50, gamma=0.5) # managinng lr decay test_accuracy = [] train_accuracy = [] for epoch in range(1, args.epochs + 1): print('Epoch:', epoch, 'LR:', scheduler.get_lr()) train(args, model, device, train_loader, optimizer, epoch) print("Train set:\n") test(args, model, device, train_loader, optimizer, epoch) print("Test set:\n") test(args, model, device, test_loader, optimizer, epoch) print("Train augmented set:\n") test(args, model, device, train_loader_aug, optimizer, epoch) print("Test augmented set:\n") test(args, model, device, test_loader_aug, optimizer, epoch) scheduler.step(epoch=epoch)
def main(): img_size = 96 bs = 4 val_size = 4 trans_lr = 1e-4 start = time.time() batchgen = BatchGenerator(img_size=img_size, LRDir=TRAIN_LR_DIR, HRDir=TRAIN_HR_DIR, aug=True) valgen = BatchGenerator(img_size=img_size, LRDir=VAL_LR_DIR, HRDir=VAL_HR_DIR, aug=False) IN_, OUT_ = batchgen.getBatch(4) IN_ = tileImage(IN_) IN_ = cv2.resize(IN_, (img_size * 2 * 4, img_size * 2 * 4), interpolation=cv2.INTER_CUBIC) IN_ = (IN_ + 1) * 127.5 OUT_ = tileImage(OUT_) OUT_ = cv2.resize(OUT_, (img_size * 4 * 2, img_size * 4 * 2)) OUT_ = (OUT_ + 1) * 127.5 Z_ = np.concatenate((IN_, OUT_), axis=1) cv2.imwrite("input.png", Z_) print("%s sec took sampling" % (time.time() - start)) start = time.time() x = tf.placeholder(tf.float32, [bs, img_size, img_size, 3]) t = tf.placeholder(tf.float32, [bs, img_size * 4, img_size * 4, 3]) lr = tf.placeholder(tf.float32) y = buildSRGAN_g(x) test_y = buildSRGAN_g(x, reuse=True, isTraining=False) fake_y = buildSRGAN_d(y) real_y = buildSRGAN_d(t, reuse=True) vgg_y1, vgg_y2, vgg_y3, vgg_y4, vgg_y5 = vgg19(y) vgg_t1, vgg_t2, vgg_t3, vgg_t4, vgg_t5 = vgg19(t, reuse=True) d_loss_real = tf.log((real_y) + 1e-10) d_loss_fake = tf.log(1 - (fake_y) + 1e-10) g_loss_fake = tf.reduce_mean(-tf.log((fake_y) + 1e-10)) * 2e-3 wd_g = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES, scope="Generator") wd_d = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES, scope="Discriminator") wd_g = tf.reduce_sum(wd_g) wd_d = tf.reduce_sum(wd_d) L1_loss = tf.reduce_mean(tf.square(y - t)) e_1 = tf.reduce_mean(tf.square(vgg_y1 - vgg_t1)) * 2.8 e_2 = tf.reduce_mean(tf.square(vgg_y2 - vgg_t2)) * 0.2 e_3 = tf.reduce_mean(tf.square(vgg_y3 - vgg_t3)) * 0.08 e_4 = tf.reduce_mean(tf.square(vgg_y4 - vgg_t4)) * 0.2 e_5 = tf.reduce_mean(tf.square(vgg_y5 - vgg_t5)) * 75.0 vgg_loss = (e_1 + e_2 + e_3 + e_4 + e_5) * 2e-7 pre_loss = L1_loss + vgg_loss + wd_g g_loss = L1_loss + vgg_loss + g_loss_fake + wd_g d_loss = tf.reduce_mean(-(d_loss_fake + d_loss_real)) + wd_d g_pre = tf.train.AdamOptimizer(1e-4, beta1=0.5).minimize( pre_loss, var_list=[x for x in tf.trainable_variables() if "SRGAN_g" in x.name]) g_opt = tf.train.AdamOptimizer(lr, beta1=0.5).minimize( g_loss, var_list=[x for x in tf.trainable_variables() if "SRGAN_g" in x.name]) d_opt = tf.train.AdamOptimizer(lr / 2, beta1=0.5).minimize( d_loss, var_list=[x for x in tf.trainable_variables() if "SRGAN_d" in x.name]) print("%.4f sec took building" % (time.time() - start)) printParam(scope="SRGAN_g") printParam(scope="SRGAN_d") printParam(scope="vgg19") g_vars = [x for x in tf.trainable_variables() if "SRGAN_g" in x.name] d_vars = [x for x in tf.trainable_variables() if "SRGAN_d" in x.name] vgg_vars = [x for x in tf.trainable_variables() if "vgg19" in x.name] saver = tf.train.Saver() saver_vgg = tf.train.Saver(vgg_vars) sess = tf.Session() sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(SAVE_DIR) if ckpt: # is checkpoint exist last_model = ckpt.model_checkpoint_path #last_model = ckpt.all_model_checkpoint_paths[0] print("load " + last_model) saver.restore(sess, last_model) # read variable data print("succeed restore model") else: init = tf.global_variables_initializer() sess.run(init) ckpt_vgg = tf.train.get_checkpoint_state('modelvgg') last_model = ckpt_vgg.model_checkpoint_path saver_vgg.restore(sess, last_model) print("%.4e sec took initializing" % (time.time() - start)) hist = [] hist_g = [] hist_d = [] start = time.time() print("start pretrain") for p in range(50001): batch_images_x, batch_images_t = batchgen.getBatch(bs) tmp, gen_loss, L1, vgg = sess.run([g_pre, pre_loss, L1_loss, vgg_loss], feed_dict={ x: batch_images_x, t: batch_images_t }) hist.append(gen_loss) print("in step %s, pre_loss =%.4e, L1_loss=%.4e, vgg_loss=%.4e" % (p, gen_loss, L1, vgg)) if p % 100 == 0: batch_images_x, batch_images_t = batchgen.getBatch(bs) out = sess.run(test_y, feed_dict={x: batch_images_x}) X_ = tileImage(batch_images_x[:4]) Y_ = tileImage(out[:4]) Z_ = tileImage(batch_images_t[:4]) X_ = cv2.resize(X_, (img_size * 2 * 4, img_size * 2 * 4), interpolation=cv2.INTER_CUBIC) X_ = (X_ + 1) * 127.5 Y_ = (Y_ + 1) * 127.5 Z_ = (Z_ + 1) * 127.5 Z_ = np.concatenate((X_, Y_, Z_), axis=1) cv2.imwrite("{}/pre_{}.png".format(SAVEIM_DIR, p), Z_) fig = plt.figure(figsize=(8, 6), dpi=128) ax = fig.add_subplot(111) plt.title("Loss") plt.grid(which="both") plt.yscale("log") ax.plot(hist, label="gen_loss", linewidth=0.25) plt.xlabel('step', fontsize=16) plt.ylabel('loss', fontsize=16) plt.legend(loc='upper right') plt.savefig("hist_pre.png") plt.close() print("%.4e sec took 100steps" % (time.time() - start)) start = time.time() if p % 5000 == 0 and p != 0: saver.save(sess, os.path.join(SAVEPRE_DIR, "model.ckpt"), p) print("start Discriminator") for d in range(0): batch_images_x, batch_images_t = batchgen.getBatch(bs) tmp, dis_loss = sess.run([ d_opt, d_loss, ], feed_dict={ x: batch_images_x, t: batch_images_t, lr: 1e-4, }) print("in step %s, dis_loss = %.4e" % (d, dis_loss)) print("start GAN") for i in range(100001): batch_images_x, batch_images_t = batchgen.getBatch(bs) tmp, gen_loss, L1, adv, vgg, = sess.run( [g_opt, g_loss, L1_loss, g_loss_fake, vgg_loss], feed_dict={ x: batch_images_x, t: batch_images_t, lr: trans_lr, }) batch_images_x, batch_images_t = batchgen.getBatch(bs) tmp, dis_loss = sess.run([ d_opt, d_loss, ], feed_dict={ x: batch_images_x, t: batch_images_t, lr: trans_lr, }) batch_images_x, batch_images_t = batchgen.getBatch(bs) tmp, gen_loss, L1, adv, vgg, = sess.run( [g_opt, g_loss, L1_loss, g_loss_fake, vgg_loss], feed_dict={ x: batch_images_x, t: batch_images_t, lr: trans_lr, }) if trans_lr > 1e-5: trans_lr = trans_lr * 0.99998 print("in step %s, dis_loss = %.4e, gen_loss = %.4e" % (i, dis_loss, gen_loss)) print("L1_loss=%.4e, adv_loss=%.4e, vgg_loss=%.4e" % (L1, adv, vgg)) hist_g.append(gen_loss) hist_d.append(dis_loss) if i % 100 == 0: batch_images_x, batch_images_t = batchgen.getBatch(bs) out = sess.run(test_y, feed_dict={x: batch_images_x}) X_ = tileImage(batch_images_x[:4]) Y_ = tileImage(out[:4]) Z_ = tileImage(batch_images_t[:4]) X_ = (X_ + 1) * 127.5 X_ = cv2.resize(X_, (img_size * 4 * 2, img_size * 4 * 2), interpolation=cv2.INTER_CUBIC) Y_ = (Y_ + 1) * 127.5 Z_ = (Z_ + 1) * 127.5 Z_ = np.concatenate((X_, Y_, Z_), axis=1) cv2.imwrite("{}/{}.png".format(SAVEIM_DIR, i), Z_) fig = plt.figure(figsize=(8, 6), dpi=128) ax = fig.add_subplot(111) plt.title("Loss") plt.grid(which="both") plt.yscale("log") ax.plot(hist_g, label="gen_loss", linewidth=0.25) ax.plot(hist_d, label="dis_loss", linewidth=0.25) plt.xlabel('step', fontsize=16) plt.ylabel('loss', fontsize=16) plt.legend(loc='upper right') plt.savefig("hist.png") plt.close() print("%.4f sec took per 100steps, lr = %.4e" % (time.time() - start, trans_lr)) start = time.time() if i % 5000 == 0 and i != 0: saver.save(sess, os.path.join(SAVE_DIR, "model.ckpt"), i)
def main(): # parse the argument parser = argparse.ArgumentParser() parser.add_argument( 'model', help='The model for image classification', choices=['alexnet', 'vgg13', 'vgg16', 'vgg19', 'resnet', 'googlenet']) args = parser.parse_args() # PaddlePaddle init paddle.init(use_gpu=True, trainer_count=1) image = paddle.layer.data( name="image", type=paddle.data_type.dense_vector(DATA_DIM)) lbl = paddle.layer.data( name="label", type=paddle.data_type.integer_value(CLASS_DIM)) extra_layers = None learning_rate = 0.01 if args.model == 'alexnet': out = alexnet.alexnet(image, class_dim=CLASS_DIM) elif args.model == 'vgg13': out = vgg.vgg13(image, class_dim=CLASS_DIM) elif args.model == 'vgg16': out = vgg.vgg16(image, class_dim=CLASS_DIM) elif args.model == 'vgg19': out = vgg.vgg19(image, class_dim=CLASS_DIM) elif args.model == 'resnet': out = resnet.resnet_imagenet(image, class_dim=CLASS_DIM) learning_rate = 0.1 elif args.model == 'googlenet': out, out1, out2 = googlenet.googlenet(image, class_dim=CLASS_DIM) loss1 = paddle.layer.cross_entropy_cost( input=out1, label=lbl, coeff=0.3) paddle.evaluator.classification_error(input=out1, label=lbl) loss2 = paddle.layer.cross_entropy_cost( input=out2, label=lbl, coeff=0.3) paddle.evaluator.classification_error(input=out2, label=lbl) extra_layers = [loss1, loss2] cost = paddle.layer.classification_cost(input=out, label=lbl) # Create parameters parameters = paddle.parameters.create(cost) # Create optimizer optimizer = paddle.optimizer.Momentum( momentum=0.9, regularization=paddle.optimizer.L2Regularization(rate=0.0005 * BATCH_SIZE), learning_rate=learning_rate / BATCH_SIZE, learning_rate_decay_a=0.1, learning_rate_decay_b=128000 * 35, learning_rate_schedule="discexp", ) train_reader = paddle.batch( paddle.reader.shuffle( flowers.train(), # To use other data, replace the above line with: # reader.train_reader('train.list'), buf_size=1000), batch_size=BATCH_SIZE) test_reader = paddle.batch( flowers.valid(), # To use other data, replace the above line with: # reader.test_reader('val.list'), batch_size=BATCH_SIZE) # End batch and end pass event handler def event_handler(event): if isinstance(event, paddle.event.EndIteration): if event.batch_id % 1 == 0: print "\nPass %d, Batch %d, Cost %f, %s" % ( event.pass_id, event.batch_id, event.cost, event.metrics) if isinstance(event, paddle.event.EndPass): with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f: parameters.to_tar(f) result = trainer.test(reader=test_reader) print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) # Create trainer trainer = paddle.trainer.SGD( cost=cost, parameters=parameters, update_equation=optimizer, extra_layers=extra_layers) trainer.train( reader=train_reader, num_passes=200, event_handler=event_handler)
def test_vgg19_save_weights(): from vgg import vgg19 MODEL_SAVE_PATH = './trained_models/' enc_c_net = vgg19(pretrained=True, end_with='conv4_1', name='content') enc_c_net.save_weights(MODEL_SAVE_PATH + 'predefined_vgg19_endwith(conv4_1)_weights.h5')
def train(args): # 是否使用GPU device = torch.device("cuda" if args.cuda else "cpu") # 设置随机种子 np.random.seed(args.seed) torch.manual_seed(args.seed) # 数据载入及预处理 transform = transforms.Compose([transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255))]) dataSet = datasets.ImageFolder(args.dataset, transform) data = DataLoader(dataSet, batch_size=args.batch_size) # 初始化训练模型 transformer = transformNet().to(device) optimizer = Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() # 预训练 vgg = vgg19(requires_grad=False).to(device) styleTransform = transforms.Compose([transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255))]) style = loadImage(args.style_image, size=args.style_size) style = styleTransform(style) style = style.repeat(args.batch_size, 1, 1, 1).to(device) features_style = vgg(normalizeBatch(style)) gram_style = [gram(y) for y in features_style] # 训练 for epoch in range(args.epochs): transformer.train() agg_content_loss = 0. agg_style_loss = 0. count = 0 for batchId, (x, _) in enumerate(data): n_batch = len(x) count += n_batch optimizer.zero_grad() # 数据部署到GPU或CPU x = x.to(device) y = transformer(x) # 归一化 y = normalizeBatch(y) x = normalizeBatch(x) # 提取特征 features_y = vgg(y) features_x = vgg(x) # 计算 content loss content_loss = args.content_weight * mse_loss( features_y.relu3_3, features_x.relu3_3) # 计算 style loss style_loss = 0. for ft_y, gm_s in zip(features_y, gram_style): gm_y = gram(ft_y) style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :]) style_loss *= args.style_weight # 计算 total loss total_loss = content_loss + style_loss # 反向传播 total_loss.backward() # 更新模型 optimizer.step() # 计算 aggregate loss agg_content_loss += content_loss.item() agg_style_loss += style_loss.item() # 输出日志 if (batchId + 1) % args.log_interval == 0: msg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {}\tstyle: {}\ttotal: {}".format(time.ctime(), epoch + 1, count, len(dataSet), agg_content_loss / (batchId + 1), agg_style_loss / (batchId + 1), (agg_content_loss + agg_style_loss) / (batchId + 1)) print(msg) # 保存检查点 if args.checkpoint_model_dir is not None and (batchId + 1) % args.checkpoint_interval == 0: transformer.eval().cpu() ckpt_model_filename = "ckpt_epoch_" + str(epoch) + "_batch_id_" + str(batchId + 1) + ".pth" ckpt_model_path = args.checkpoint_model_dir + '/' + args.save_model_name ckpt_model_path += '/' + ckpt_model_filename torch.save(transformer.state_dict(), ckpt_model_path) transformer.to(device).train() # 保存模型 transformer.eval().cpu() save_model_path = args.save_model_dir + '/' + args.save_model_name + '.pth' torch.save(transformer.state_dict(), save_model_path) print("model saved at", save_model_path)
def main(): # parse the argument parser = argparse.ArgumentParser() parser.add_argument('-m', '--model', help='The model for image classification', choices=[ 'alexnet', 'vgg13', 'vgg16', 'vgg19', 'resnet', 'googlenet', 'inception-resnet-v2', 'inception_v4', 'xception' ]) parser.add_argument( '-r', '--retrain_file', type=str, default='', help="The model file to retrain, none is for train from scratch") args = parser.parse_args() # PaddlePaddle init paddle.init(use_gpu=True, trainer_count=1) image = paddle.layer.data(name="image", type=paddle.data_type.dense_vector(DATA_DIM)) lbl = paddle.layer.data(name="label", type=paddle.data_type.integer_value(CLASS_DIM)) extra_layers = None learning_rate = 0.0001 if args.model == 'alexnet': out = alexnet.alexnet(image, class_dim=CLASS_DIM) elif args.model == 'vgg13': out = vgg.vgg13(image, class_dim=CLASS_DIM) elif args.model == 'vgg16': out = vgg.vgg16(image, class_dim=CLASS_DIM) elif args.model == 'vgg19': out = vgg.vgg19(image, class_dim=CLASS_DIM) elif args.model == 'resnet': conv, pool, out = resnet.resnet_imagenet(image, class_dim=CLASS_DIM) learning_rate = 0.1 elif args.model == 'googlenet': out, out1, out2 = googlenet.googlenet(image, class_dim=CLASS_DIM) loss1 = paddle.layer.cross_entropy_cost(input=out1, label=lbl, coeff=0.3) paddle.evaluator.classification_error(input=out1, label=lbl) loss2 = paddle.layer.cross_entropy_cost(input=out2, label=lbl, coeff=0.3) paddle.evaluator.classification_error(input=out2, label=lbl) extra_layers = [loss1, loss2] elif args.model == 'inception-resnet-v2': assert DATA_DIM == 3 * 331 * 331 or DATA_DIM == 3 * 299 * 299 out = inception_resnet_v2.inception_resnet_v2(image, class_dim=CLASS_DIM, dropout_rate=0.5, data_dim=DATA_DIM) elif args.model == 'inception_v4': conv, pool, out = inception_v4.inception_v4(image, class_dim=CLASS_DIM) elif args.model == 'xception': out = xception.xception(image, class_dim=CLASS_DIM) cost = paddle.layer.classification_cost(input=out, label=lbl) # Create parameters parameters = paddle.parameters.create(cost) for k, v in parameters.__param_conf__.items(): print(" config key {0}\t\t\tval{1}".format(k, v)) print("-" * 50) #print(parameters.__param_conf__[0]) if args.retrain_file is not None and '' != args.retrain_file: print("restore parameters from {0}".format(args.retrain_file)) exclude_params = [ param for param in parameters.names() if param.startswith('___fc_layer_0__') ] parameters.init_from_tar(gzip.open(args.retrain_file), exclude_params) # Create optimizer optimizer = paddle.optimizer.Momentum( momentum=0.9, regularization=paddle.optimizer.L2Regularization(rate=0.0005 * BATCH_SIZE), learning_rate=learning_rate / BATCH_SIZE, learning_rate_decay_a=0.1, learning_rate_decay_b=128000 * 35, learning_rate_schedule="discexp", ) train_reader = paddle.batch( paddle.reader.shuffle( # flowers.train(), # To use other data, replace the above line with: reader.train_reader('valid_train0.lst'), buf_size=2048), batch_size=BATCH_SIZE) test_reader = paddle.batch( # flowers.valid(), # To use other data, replace the above line with: reader.test_reader('valid_val.lst'), batch_size=BATCH_SIZE) # Create trainer trainer = paddle.trainer.SGD(cost=cost, parameters=parameters, update_equation=optimizer, extra_layers=extra_layers) # End batch and end pass event handler def event_handler(event): global step global start if isinstance(event, paddle.event.EndIteration): if event.batch_id % 10 == 0: print "\nPass %d, Batch %d, Cost %f, %s, %s" % ( event.pass_id, event.batch_id, event.cost, event.metrics, time.time() - start) start = time.time() loss_scalar.add_record(step, event.cost) acc_scalar.add_record( step, 1 - event.metrics['classification_error_evaluator']) start = time.time() step += 1 if event.batch_id % 100 == 0: with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f: trainer.save_parameter_to_tar(f) if isinstance(event, paddle.event.EndPass): with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f: trainer.save_parameter_to_tar(f) result = trainer.test(reader=test_reader) print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) trainer.train(reader=train_reader, num_passes=200, event_handler=event_handler)
def get_model(args): network = args.network if network == 'vgg11': model = vgg.vgg11(num_classes=args.class_num) elif network == 'vgg13': model = vgg.vgg13(num_classes=args.class_num) elif network == 'vgg16': model = vgg.vgg16(num_classes=args.class_num) elif network == 'vgg19': model = vgg.vgg19(num_classes=args.class_num) elif network == 'vgg11_bn': model = vgg.vgg11_bn(num_classes=args.class_num) elif network == 'vgg13_bn': model = vgg.vgg13_bn(num_classes=args.class_num) elif network == 'vgg16_bn': model = vgg.vgg16_bn(num_classes=args.class_num) elif network == 'vgg19_bn': model = vgg.vgg19_bn(num_classes=args.class_num) elif network == 'resnet18': model = models.resnet18(num_classes=args.class_num) model.conv1 = torch.nn.Conv2d(in_channels=1, out_channels=model.conv1.out_channels, kernel_size=model.conv1.kernel_size, stride=model.conv1.stride, padding=model.conv1.padding, bias=model.conv1.bias) elif network == 'resnet34': model = models.resnet34(num_classes=args.class_num) model.conv1 = torch.nn.Conv2d(in_channels=1, out_channels=model.conv1.out_channels, kernel_size=model.conv1.kernel_size, stride=model.conv1.stride, padding=model.conv1.padding, bias=model.conv1.bias) elif network == 'resnet50': model = models.resnet50(num_classes=args.class_num) model.conv1 = torch.nn.Conv2d(in_channels=1, out_channels=model.conv1.out_channels, kernel_size=model.conv1.kernel_size, stride=model.conv1.stride, padding=model.conv1.padding, bias=model.conv1.bias) elif network == 'resnet101': model = models.resnet101(num_classes=args.class_num) model.conv1 = torch.nn.Conv2d(in_channels=1, out_channels=model.conv1.out_channels, kernel_size=model.conv1.kernel_size, stride=model.conv1.stride, padding=model.conv1.padding, bias=model.conv1.bias) elif network == 'resnet152': model = models.resnet152(num_classes=args.class_num) model.conv1 = torch.nn.Conv2d(in_channels=1, out_channels=model.conv1.out_channels, kernel_size=model.conv1.kernel_size, stride=model.conv1.stride, padding=model.conv1.padding, bias=model.conv1.bias) elif network == 'densenet121': model = densenet.densenet121(num_classes=args.class_num) elif network == 'densenet169': model = densenet.densenet169(num_classes=args.class_num) elif network == 'densenet161': model = densenet.densenet161(num_classes=args.class_num) elif network == 'densenet201': model = densenet.densenet201(num_classes=args.class_num) return model
def main(): parser = argparse.ArgumentParser( description='Pytorch implementation of Neural Artistic Style Transfer') parser.add_argument('--w_content', default=80.0, type=float, help='Weight for content loss') parser.add_argument('--w_style', default=1.0, type=float, help='Weight for style loss') parser.add_argument('--img_content', default='content.jpg', help='Image name for content') parser.add_argument('--img_style', default='style.jpg', help='Image name for style') parser.add_argument('--iteration', '-i', default=50, type=int, help='Total iteration') parser.add_argument('--learning_rate', '-lr', default=0.001, type=int, help='Learning Rate') parser.add_argument('--batch_size', '-bs', default=1, type=int, help='Batch size') parser.add_argument('--image_size', '-is', default=256, type=int, help='Image size') args = parser.parse_args() ### Setting parameters ### w_content = args.w_content w_style = args.w_style iteration = args.iteration lr = args.learning_rate batch_s = args.batch_size image_s = args.image_size ### Load Model ### vggnet = vgg.vgg19(pretrained=True).cuda().eval() resnet = T.TransformNet().cuda().train() ### Load Images ### image_style, image_content = N.image_loader(args.img_style, args.img_content, batch_s, image_s) #image_modify = image_content.clone() #image_modify.requires_grad = True train_loader, _, _ = misc.load_lsun(batch_s, image_s) ### Iteration ### optimi = optim.Adam(resnet.parameters(), lr=lr) print('entering epoch') for epoch in range(iteration): for batch_idx, batch_data in enumerate(train_loader): optimi.zero_grad() batch_img, _ = batch_data batch_img = batch_img.cuda() image_resnet = resnet(batch_img) net_m, content_losses, style_losses = N.get_layer_out( vggnet, batch_img, image_style) net_m(image_resnet) content_loss_sum = 0.0 style_loss_sum = 0.0 for c in content_losses: content_loss_sum += c.loss for s in style_losses: style_loss_sum += s.loss loss = style_loss_sum * w_style + content_loss_sum * w_content loss.backward() if True: print('epoch: {}, batch: {}, loss: {} / {} / {}'.format( epoch, batch_idx, loss.data, style_loss_sum.data * w_style, content_loss_sum.data * w_content)) optimi.step() if batch_idx % 100 == 0: utils.save_image( torch.squeeze(image_resnet[0]), 'output_train_e{}b{}.jpg'.format(epoch, batch_idx)) utils.save_image( torch.squeeze(batch_img[0]), 'output_train_gt_e{}b{}.jpg'.format(epoch, batch_idx)) image_test = resnet(image_content) utils.save_image( torch.squeeze(image_test[0]), 'output_test_e{}b{}.jpg'.format(epoch, batch_idx)) print(torch.max(batch_img), torch.max(image_test), torch.max(image_content)) print(torch.min(batch_img), torch.min(image_test), torch.min(image_content)) if batch_idx % 5000 == 0: torch.save( resnet.state_dict(), './saved_model/model_e{}b{}.pt'.format(epoch, batch_idx))
import torchvision import torchvision.transforms as transforms import argparse import vgg transform_train =transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) vggnet=vgg.vgg19() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(vggnet.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) trainset = torchvision.datasets.CIFAR10(root='data', train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True) testset = torchvision.datasets.CIFAR10(root='data', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True) if __name__ == "__main__": for epoch in range(10): print('\nEpoch: %d' % (epoch + 1)) vggnet.train() sum_loss = 0.0 correct = 0.0
def train(args): device = "cuda" np.random.seed(args.seed) # load path of train images train_images = os.listdir(args.dataset) train_images = [ image for image in train_images if not image.endswith("txt") ] random.shuffle(train_images) images_num = len(train_images) print("dataset size: %d" % images_num) # Initialize transforemer net, optimizer, and loss function transformer = TransformerNet().to("cuda") optimizer = Adam(transformer.parameters(), args.lr) mse_loss = flow.nn.MSELoss() if args.load_checkpoint_dir is not None: state_dict = flow.load(args.load_checkpoint_dir) transformer.load_state_dict(state_dict) print("successfully load checkpoint from " + args.load_checkpoint_dir) # load pretrained vgg16 if args.vgg == "vgg19": vgg = vgg19(pretrained=True) else: vgg = vgg16(pretrained=True) vgg = VGG_WITH_FEATURES(vgg.features, requires_grad=False) vgg.to("cuda") style_image = utils.load_image(args.style_image) style_image_recover = recover_image(style_image) features_style = vgg( utils.normalize_batch(flow.Tensor(style_image).to("cuda"))) gram_style = [utils.gram_matrix(y) for y in features_style] for e in range(args.epochs): transformer.train() agg_content_loss = 0.0 agg_style_loss = 0.0 count = 0 for i in range(images_num): image = load_image("%s/%s" % (args.dataset, train_images[i])) n_batch = 1 count += n_batch x_gpu = flow.tensor(image, requires_grad=True).to("cuda") y_origin = transformer(x_gpu) x_gpu = utils.normalize_batch(x_gpu) y = utils.normalize_batch(y_origin) features_x = vgg(x_gpu) features_y = vgg(y) content_loss = args.content_weight * mse_loss( features_y.relu2_2, features_x.relu2_2) style_loss = 0.0 for ft_y, gm_s in zip(features_y, gram_style): gm_y = utils.gram_matrix(ft_y) style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :]) style_loss *= args.style_weight total_loss = content_loss + style_loss total_loss.backward() optimizer.step() optimizer.zero_grad() agg_content_loss += content_loss.numpy() agg_style_loss += style_loss.numpy() if (i + 1) % args.log_interval == 0: if args.style_log_dir is not None: y_recover = recover_image(y_origin.numpy()) image_recover = recover_image(image) result = np.concatenate( (style_image_recover, image_recover), axis=1) result = np.concatenate((result, y_recover), axis=1) cv2.imwrite(args.style_log_dir + str(i + 1) + ".jpg", result) print(args.style_log_dir + str(i + 1) + ".jpg" + " saved") mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format( time.ctime(), e + 1, count, images_num, agg_content_loss / (i + 1), agg_style_loss / (i + 1), (agg_content_loss + agg_style_loss) / (i + 1), ) print(mesg) if (args.checkpoint_model_dir is not None and (i + 1) % args.checkpoint_interval == 0): transformer.eval() ckpt_model_filename = ("CW_" + str(int(args.content_weight)) + "_lr_" + str(args.lr) + "ckpt_epoch" + str(e) + "_" + str(i + 1)) ckpt_model_path = os.path.join(args.checkpoint_model_dir, ckpt_model_filename) flow.save(transformer.state_dict(), ckpt_model_path) transformer.train() # save model transformer.eval() save_model_filename = ("CW_" + str(args.content_weight) + "_lr_" + str(args.lr) + "sketch_epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(" ", "_") + "_" + str(args.content_weight) + "_" + str(args.style_weight)) save_model_path = os.path.join(args.save_model_dir, save_model_filename) flow.save(transformer.state_dict(), save_model_path) print("\nDone, trained model saved at", save_model_path)
import datetime import threading import torch from vgg import vgg19 from torchvision import transforms from PIL import Image import requests import json trans = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) model = vgg19() model.load_state_dict( torch.load('best_model.pth', map_location=torch.device('cpu'))) url = 'https://5gvr.komect.com/edu/stu-flow' #"http://117.139.13.88:18089/scenic/bsdFlow" rtmp_str = 'rtmp://47.106.60.245:1936/live/620271123-1?eyJrZXkiOjAsInNpZ24iOiJFZ2FSYjhpdC1mMW5EbHJxb19WQWdmVEYza3FUQmZmZy1kOW53SGJrYVA2V3E5T1hCMFdmd0NXc1hscktsX0hwTnpnU2NtNTNKSWRCemZQdWN3RGtsbHdtQVUzWk9zU0t5NG1OekdGNnplUXJsRVoxSGktMTJFMnNNZUpDUUg1dGVvamdNY3NKdlJ1b1ZRT1cyaF9OSWdPUUt6YTlwREFzVEU1Q0tTOGlkTWMifQ' def queue_img_put(q, name, pwd, ip, channel=1): cap = cv2.VideoCapture(rtmp_str) while True: is_opened, frame = cap.read() q.put(frame) if is_opened else None q.get() if q.qsize() > 1 else None # if is_opened: # q.put(frame) # else:
def __init__(self, model_file, gradient=False): super(VGG19, self).__init__() features = vgg19(pretrained=True, model_file=model_file).features # feature layers """ vgg.features Sequential( (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (1): ReLU(inplace) # self.relu1_1 (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (3): ReLU(inplace) # self.relu1_2 (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (6): ReLU(inplace) (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (8): ReLU(inplace) (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (11): ReLU(inplace) (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (13): ReLU(inplace) (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (15): ReLU(inplace) (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (17): ReLU(inplace) (18): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (19): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (20): ReLU(inplace) (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (22): ReLU(inplace) (23): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (24): ReLU(inplace) (25): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (26): ReLU(inplace) (27): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (29): ReLU(inplace) (30): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (31): ReLU(inplace) (32): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (33): ReLU(inplace) (34): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (35): ReLU(inplace) (36): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) ) """ # hierarchy 1 (level 1) self.conv1_1 = features[0] self.relu1_1 = features[1] self.conv1_2 = features[2] self.relu1_2 = features[3] # hierarchy 2 (level 2) self.pool1 = features[4] self.conv2_1 = features[5] self.relu2_1 = features[6] self.conv2_2 = features[7] self.relu2_2 = features[8] # hierarchy 3 (level 3) self.pool2 = features[9] self.conv3_1 = features[10] self.relu3_1 = features[11] self.conv3_2 = features[12] self.relu3_2 = features[13] self.conv3_3 = features[14] self.relu3_3 = features[15] self.conv3_4 = features[16] self.relu3_4 = features[17] # hierarchy 4 (level 4) self.pool3 = features[18] self.conv4_1 = features[19] self.relu4_1 = features[20] self.conv4_2 = features[21] self.relu4_2 = features[22] self.conv4_3 = features[23] self.relu4_3 = features[24] self.conv4_4 = features[25] self.relu4_4 = features[26] # hierarchy 5 (level 5) self.pool4 = features[27] self.conv5_1 = features[28] self.relu5_1 = features[29] self.conv5_2 = features[30] self.relu5_2 = features[31] self.conv5_3 = features[32] self.relu5_3 = features[33] self.conv5_4 = features[34] self.relu5_4 = features[35] self.pool5 = features[36] # don't need the gradients, just want the features if not gradient: for param in self.parameters(): param.requires_grad = False self.pad = nn.ReflectionPad2d(padding=1)
self.G = self.gram.forward(input) self.G.mul_(self.strength) self.loss = self.criterion.forward(self.G, self.target) self.gram_record = self.gram.record return self.output def backward(self, retain_variables=True): """compute backward pass""" self.loss.backward(retain_variables=retain_variables) return self.loss.data[0] ####### load model #cnn = models.alexnet(pretrained=True).features.cuda() # Alexnet has 5 Conv2d layers vgg_19 = vgg19(pretrained=True).features.cuda() # VGG19 has 16 Conv2d layers cnn = vgg_19 # desired depth layers to compute style/content losses : content_layers = ['conv_10', 'conv_13', 'conv_15'] style_layers = ['conv_1', 'conv_3', 'conv_5', 'conv_7'] #------------ """make StyleTransferNet class""" #------------ 4 class StyleTransferNet(nn.Module): """a network for neural style transfer"""
def main(): # parse the argument parser = argparse.ArgumentParser() parser.add_argument( 'model', help='The model for image classification', choices=['alexnet', 'vgg13', 'vgg16', 'vgg19', 'resnet', 'googlenet']) args = parser.parse_args() # PaddlePaddle init paddle.init(use_gpu=True, trainer_count=7) image = paddle.layer.data( name="image", type=paddle.data_type.dense_vector(DATA_DIM)) lbl = paddle.layer.data( name="label", type=paddle.data_type.integer_value(CLASS_DIM)) extra_layers = None learning_rate = 0.01 if args.model == 'alexnet': out = alexnet.alexnet(image, class_dim=CLASS_DIM) elif args.model == 'vgg13': out = vgg.vgg13(image, class_dim=CLASS_DIM) elif args.model == 'vgg16': out = vgg.vgg16(image, class_dim=CLASS_DIM) elif args.model == 'vgg19': out = vgg.vgg19(image, class_dim=CLASS_DIM) elif args.model == 'resnet': out = resnet.resnet_imagenet(image, class_dim=CLASS_DIM) learning_rate = 0.1 elif args.model == 'googlenet': out, out1, out2 = googlenet.googlenet(image, class_dim=CLASS_DIM) loss1 = paddle.layer.cross_entropy_cost( input=out1, label=lbl, coeff=0.3) paddle.evaluator.classification_error(input=out1, label=lbl) loss2 = paddle.layer.cross_entropy_cost( input=out2, label=lbl, coeff=0.3) paddle.evaluator.classification_error(input=out2, label=lbl) extra_layers = [loss1, loss2] cost = paddle.layer.classification_cost(input=out, label=lbl) # Create parameters parameters = paddle.parameters.create(cost) # Create optimizer optimizer = paddle.optimizer.Momentum( momentum=0.9, regularization=paddle.optimizer.L2Regularization(rate=0.0005 * BATCH_SIZE), learning_rate=learning_rate / BATCH_SIZE, learning_rate_decay_a=0.1, learning_rate_decay_b=128000 * 35, learning_rate_schedule="discexp", ) train_reader = paddle.batch( paddle.reader.shuffle( flowers.train(), # To use other data, replace the above line with: # reader.train_reader('train.list'), buf_size=1000), batch_size=BATCH_SIZE) test_reader = paddle.batch( flowers.valid(), # To use other data, replace the above line with: # reader.test_reader('val.list'), batch_size=BATCH_SIZE) # Create trainer trainer = paddle.trainer.SGD( cost=cost, parameters=parameters, update_equation=optimizer, extra_layers=extra_layers) # End batch and end pass event handler def event_handler(event): if isinstance(event, paddle.event.EndIteration): if event.batch_id % 1 == 0: print "\nPass %d, Batch %d, Cost %f, %s" % ( event.pass_id, event.batch_id, event.cost, event.metrics) if isinstance(event, paddle.event.EndPass): with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f: trainer.save_parameter_to_tar(f) result = trainer.test(reader=test_reader) print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) trainer.train( reader=train_reader, num_passes=200, event_handler=event_handler)