def build_model(self): """ Instantiates the model, loss criterion, and optimizer """ # instantiate model self.model = VGGNet(self.config, self.use_batch_norm, self.input_channels, self.class_count, self.init_weights) # instantiate loss criterion self.criterion = nn.CrossEntropyLoss() # instantiate optimizer self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr, momentum=self.momentum, weight_decay=self.weight_decay) # print network self.print_network(self.model, 'VGGNet') # use gpu if enabled if torch.cuda.is_available() and self.use_gpu: self.model.cuda() self.criterion.cuda()
def train(**kwargs): cfg = Config() for k, v in kwargs.items(): setattr(cfg, k, v) transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) content = load_image(cfg.content, transform, max_size=cfg.max_size) style = load_image(cfg.style, transform, shape=[content.size(3), content.size(2)]) target = Variable(content.clone(), requires_grad=True) optimizer = torch.optim.Adam([target], lr=cfg.lr, betas=[0.5, 0.999]) vgg = VGGNet() if cfg.use_gpu: vgg.cuda() for step in range(cfg.total_step): target_features = vgg(target) content_features = vgg(Variable(content)) style_features = vgg(Variable(style)) style_loss = 0 content_loss = 0 for f1, f2, f3 in zip(target_features, content_features, style_features): # Compute content loss content_loss += torch.mean((f1 - f2)**2) _, c, h, w = f1.size() f1 = f1.view(c, h * w) f3 = f3.view(c, h * w) # Compute gram matrix f1 = torch.mm(f1, f1.t()) f3 = torch.mm(f3, f3.t()) style_loss += torch.mean((f1 - f3)**2) / (c * h * w) loss = content_loss + cfg.style_weight * style_loss optimizer.zero_grad() loss.backward() optimizer.step() if (step + 1) % cfg.log_step == 0: print('Step [%d/%d], Content Loss: %.4f, Style Loss: %.4f' % (step + 1, cfg.total_step, content_loss.data[0], style_loss.data[0])) if (step + 1) % cfg.sample_step == 0: denorm = transforms.Normalize((-2.12, -2.04, -1.80), (4.37, 4.46, 4.44)) img = target.clone().cpu().squeeze() img = denorm(img.data).clamp_(0, 1) torchvision.utils.save_image(img, 'output-%d.png' % (step + 1))
def train(): db = './public/cards/trainImg' img_list = get_imlist(db, '.jpg') model = VGGNet() feats = [] names = [] for i, img_path in enumerate(img_list): norm_feat = model.extract_feat(img_path) img_name = os.path.split(img_path)[1] feats.append(norm_feat) names.append(img_name) print("extracting feature from image No. %d , %d images in total" % ((i+1), len(img_list))) feats = np.array(feats) output = './src/vis/model/model.h5' h5f = h5py.File(output, 'w') h5f.create_dataset('dataset_1', data=feats) h5f.create_dataset('dataset_2', data=np.string_(names)) h5f.close()
def predict(img_path, weights_path, class_indices_path): """ 对图像进行预测分类 :param img_path: 待预测图像路径 :param weights_path: 模型权重路径 :param class_indices_path: 标签类别索引 :return: 待预测图像类别 """ img_height = img_width = 224 # 加载待预测图像 img = Image.open(img_path) # 重设图像大小 img = img.resize((img_width, img_height)) plt.imshow(img) # 归一化 img = np.array(img) / 255. # 增加batch这个维度 img = (np.expand_dims(img, 0)) # 加载标签类别索引文件 try: json_file = open(class_indices_path, 'r') class_indict = json.load(json_file) except Exception as e: print(e) exit(-1) # 预测 model = VGGNet(img_height, img_width, class_num=5, name='vgg11').vgg() model.load_weights(weights_path) result = np.squeeze(model.predict(img)) predict_class = np.argmax(result) label = class_indict[str(predict_class)], result[predict_class] plt.title(label) plt.show()
from PIL import ImageEnhance import paddle.fluid as fluid from multiprocessing import cpu_count import matplotlib.pyplot as plt from data_processor import train_parameters from data_processor import * from model import VGGNet ''' 模型训练 ''' # with fluid.dygraph.guard(place = fluid.CUDAPlace(0)): with fluid.dygraph.guard(): print(train_parameters['class_dim']) print(train_parameters['label_dict']) vgg = VGGNet() optimizer = fluid.optimizer.AdamOptimizer(learning_rate=train_parameters['learning_strategy']['lr'], parameter_list=vgg.parameters()) for epoch_num in range(train_parameters['num_epochs']): for batch_id, data in enumerate(train_reader()): dy_x_data = np.array([x[0] for x in data]).astype('float32') y_data = np.array([x[1] for x in data]).astype('int64') y_data = y_data[:, np.newaxis] # 将Numpy转换为DyGraph接收的输入 img = fluid.dygraph.to_variable(dy_x_data) label = fluid.dygraph.to_variable(y_data) out, acc = vgg(img, label) loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss)
from model import VGGNet batch_size=32 train_transforms = transforms.Compose([ transforms.Resize(224), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) test_dir = './Atestdataset' test_datasets = datasets.ImageFolder(test_dir, transform=train_transforms) test_dataloader = torch.utils.data.DataLoader(test_datasets, batch_size=batch_size, shuffle=True) model = VGGNet() model.load_state_dict(torch.load('./Vgg16.pth',map_location=torch.device('cpu'))) for epoch in range(1): model = model.eval() total = 0 correct = 0 for i, data in enumerate(test_dataloader): images, labels = data vggoutputs = model(images) _, vggpredicted = torch.max(vggoutputs.data, 1) #print(labels) #print(vggpredicted) total += labels.size(0) correct += (vggpredicted == labels).sum().item() print(100.0 * correct / total)
def run(model_name): train_dir = '../flower_data/train' validation_dir = '../flower_data/validation' if not os.path.exists('../save_weights'): os.mkdir('../save_weights') img_height, img_width = 224, 224 batch_size = 32 epochs = 10 # 准备训练集和验证集 train_image_generator = ImageDataGenerator(rescale=1. / 255, horizontal_flip=True) validation_image_generator = ImageDataGenerator(rescale=1. / 255) train_data_gen = train_image_generator.flow_from_directory(directory=train_dir, batch_size=batch_size, shuffle=True, target_size=(img_height, img_width), class_mode='categorical') valid_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir, batch_size=batch_size, shuffle=False, target_size=(img_height, img_width), class_mode='categorical') # 训练样本数和验证样本数 total_train, total_valid = train_data_gen.n, valid_data_gen.n # 生成标签索引字典 class_indices = train_data_gen.class_indices inverse_dict = dict((v, k) for k, v in class_indices.items()) json_str = json.dumps(inverse_dict, indent=4) with open('../class_indices.json', 'w') as json_file: json_file.write(json_str) # 训练模型 model = VGGNet(img_height, img_width, class_num=5, name=model_name).vgg() model.summary() model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False), metrics=['accuracy']) callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath=f'../save_weights/{model_name}.h5', save_best_only=True, save_weights_only=True, monitor='val_loss')] history = model.fit(x=train_data_gen, steps_per_epoch=total_train // batch_size, epochs=epochs, validation_data=valid_data_gen, validation_steps=total_valid // batch_size, callbacks=callbacks) # 评估模型 # plot loss and accuracy image history_dict = history.history train_loss = history_dict["loss"] train_accuracy = history_dict["accuracy"] val_loss = history_dict["val_loss"] val_accuracy = history_dict["val_accuracy"] # figure 1 plt.figure() plt.plot(range(epochs), train_loss, label='train_loss') plt.plot(range(epochs), val_loss, label='val_loss') plt.legend() plt.xlabel('epochs') plt.ylabel('loss') # figure 2 plt.figure() plt.plot(range(epochs), train_accuracy, label='train_accuracy') plt.plot(range(epochs), val_accuracy, label='val_accuracy') plt.legend() plt.xlabel('epochs') plt.ylabel('accuracy') plt.show()
def train(cfg): n_class = int(cfg["data"]["n_class"]) img_h = int(cfg["data"]["img_h"]) img_w = int(cfg["data"]["img_w"]) batch_size = int(cfg["training"]["batch_size"]) epochs = int(cfg["training"]["epochs"]) lr = float(cfg["training"]["optimizer"]["lr"]) momentum = float(cfg["training"]["optimizer"]["momentum"]) w_decay = float(cfg["training"]["optimizer"]["weight_decay"]) step_size = int(cfg["training"]["lr_schedule"]["step_size"]) gamma = float(cfg["training"]["lr_schedule"]["gamma"]) configs = "FCNs-BCEWithLogits_batch{}_epoch{}_RMSprop_scheduler-step{}-gamma{}_lr{}_momentum{}_w_decay{}_input_size{}_03091842".format( batch_size, epochs, step_size, gamma, lr, momentum, w_decay, img_h) print("Configs:", configs) root_dir = cfg["data"]["root_dir"] train_filename = cfg["data"]["train_file"] val_filename = cfg["data"]["val_file"] mean_filename = cfg["data"]["mean_file"] class_weight_filename = cfg["data"]["class_weight_file"] train_file = os.path.join(root_dir, train_filename) print(train_file) val_file = os.path.join(root_dir, val_filename) mean_file = os.path.join(root_dir, mean_filename) class_weight_file = os.path.join(root_dir, class_weight_filename) model_dir = cfg["training"]["model_dir"] if not os.path.exists(model_dir): os.makedirs(model_dir) model_path = os.path.join(model_dir, configs) use_gpu = torch.cuda.is_available() num_gpu = list(range(torch.cuda.device_count())) continue_train = False #MeanRGB_train = ComputeMeanofInput(train_file) #MeanRGB_train = np.load(mean_file) MeanRGB_train = np.array([0.0, 0.0, 0.0]) print("MeanRGB_train: {}".format(MeanRGB_train)) train_data = ScanNet2d(csv_file=train_file, phase='train', trainsize=(img_h, img_w), MeanRGB=MeanRGB_train) val_data = ScanNet2d(csv_file=val_file, phase='val', trainsize=(img_h, img_w), MeanRGB=MeanRGB_train) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=1) val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=1) #class_weight = trainer.computer_class_weights(train_file) class_weight = np.load(class_weight_file) print("class_weight: {}".format(class_weight)) class_weight = torch.from_numpy(class_weight) print("shape of class weight {}".format(class_weight.shape)) vgg_model = VGGNet(requires_grad=True, remove_fc=True) fcn_model = FCN8s(encoder_net=vgg_model, n_class=n_class) if use_gpu: ts = time.time() vgg_model = vgg_model.cuda() fcn_model = fcn_model.cuda() fcn_model = nn.DataParallel(fcn_model, device_ids=num_gpu) class_weight = class_weight.cuda() print("Finish cuda loading, tme elapsed {}".format(time.time() - ts)) L = nn.BCEWithLogitsLoss(reduction='none') optimizer = optim.RMSprop(fcn_model.parameters(), lr=lr, momentum=momentum, weight_decay=w_decay) #optimizer = optim.SGD(fcn_model.parameters(), lr=lr, momentum= momentum, weight_decay=w_decay) scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma) score_dir = os.path.join("scores", configs) if not os.path.exists(score_dir): os.makedirs(score_dir) log_headers = [ 'epoch', 'train/loss', 'train/acc', 'train/acc_cls', 'train/mean_iu', 'train/fwavacc', 'val/loss', 'val/acc', 'val/acc_cls', 'val/mean_iu', 'val/fwavacc', 'elapsed_time' ] if not os.path.exists(os.path.join(score_dir, 'log.csv')): with open(os.path.join(score_dir, 'log.csv'), 'w') as f: f.write(','.join(log_headers) + '\n') IU_scores = np.zeros((epochs, n_class + 1)) pixel_scores = np.zeros(epochs) writer = SummaryWriter() # color_mapping = util.GenerateColorMapping(n_class) best_mean_iu = 0 epoch_loss = 0.0 if continue_train: model_path = "C:\\Users\\ji\\Documents\\FCN-VGG16\\models\\FCNs-BCEWithLogits_batch1_epoch500_RMSprop_scheduler-step50-gamma0.5_lr0.0001_momentum0.0_w_decay1e-05" fcn_model = torch.load(model_path) fcn_model.train() for epoch in range(epochs): fcn_model.train() scheduler.step() ts = time.time() running_loss = 0.0 ###### label_preds = [] label_trues = [] ###### for i, batch in enumerate(train_loader): optimizer.zero_grad() if use_gpu: inputs = Variable(batch['X'].cuda()) labels = Variable(batch['Y'].cuda()) else: inputs, labels = Variable(batch['X']), Variable(batch['Y']) outputs = fcn_model(inputs) #print("out: {}".format(outputs.shape)) #print("label: {}".format(labels.shape)) #print(outputs) #print(labels) loss = L(outputs, labels) # print(loss.shape) loss = loss.permute(0, 2, 3, 1).reshape(-1, n_class + 1) #.view(-1,n_class+1) # print(loss.shape) loss = torch.mean(loss, dim=0) # print(loss.shape) loss = torch.mul(loss, class_weight) # print(loss.shape) loss = torch.mean(loss) # print(loss) loss.backward() # print("grad") # print(fcn_model.outp.weight.grad) # print(fcn_model.embs[0].weight.grad) optimizer.step() #scheduler.step() if i == 0 and epoch == 0: # count= util.count_parameters(fcn_model) # print("number of parameters in model {}".format(count)) visIn = inputs[:3] #print('shape of in {}'.format(visIn[:5].shape)) visLabel = batch['l'][:3] epoch_loss += loss.item() running_loss += loss.item() # print("loss: {}".format(loss.data)) if i % 10 == 9 and i != 0: print("epoch{}, iter{}, Iterloss: {}".format( epoch, i, running_loss / 10)) writer.add_scalar('train/iter_loss', running_loss / 10, epoch * len(train_loader) + i) running_loss = 0.0 # N, _, h, w = outputs.shape # targets = batch['l'].cpu().numpy().reshape(N,h,w) # outputs = outputs.data.cpu().numpy() # preds_v, targets_v = util.visulaize_output(outputs,targets,color_mapping,n_class) # writer.add_images('train/predictions',torch.from_numpy(preds_v),dataformats='NHWC') # writer.add_images('train/targets',torch.from_numpy(targets_v),dataformats='NHWC') ##################################### outputs = outputs.data.cpu().numpy() N, _, h, w = outputs.shape pred = outputs.transpose(0, 2, 3, 1).reshape( -1, n_class + 1).argmax(axis=1).reshape(N, h, w) target = batch['l'].cpu().numpy().reshape(N, h, w) ######### for lt, lp in zip(target, pred): label_trues.append(lt) label_preds.append(lp) metrics = util.label_accuracy_score(label_trues, label_preds, n_class + 1) with open(os.path.join(score_dir, "log.csv"), 'a') as f: log = [epoch] + [epoch_loss] + list(metrics) + [''] * 7 log = map(str, log) f.write(','.join(log) + '\n') ######################################## #scheduler.step() writer.add_scalar('train/epoch_loss', epoch_loss, epoch) print("Finish epoch{}, epoch loss {}, time eplapsed {}".format( epoch, epoch_loss, time.time() - ts)) epoch_loss = 0.0 #################### writer.add_scalar('train/mean_iu', metrics[2], epoch) writer.add_scalar('train/acc', metrics[0], epoch) writer.add_scalar('train/acc_cls', metrics[1], epoch) ###################### #Training precess visulize visOut = fcn_model(visIn) preds_v, targets_v = util.visulaize_output(visOut, visLabel, n_class) writer.add_images('train/predictions', torch.from_numpy(preds_v), global_step=epoch, dataformats='NHWC') writer.add_images('train/targets', torch.from_numpy(targets_v), global_step=epoch, dataformats='NHWC') if not os.path.exists(model_path): os.makedirs(model_path) torch.save(fcn_model, os.path.join(model_path, str(epoch))) best_mean_iu = val_model(epoch, val_loader, fcn_model, use_gpu, n_class, IU_scores, pixel_scores, score_dir, writer, best_mean_iu, model_path, L) writer.flush() writer.close()
import torch import numpy as np import torch.nn as nn import torch.optim as optim from dataset import CityScapeDataset from model import FCNs, VGGNet from torchvision import transforms, utils from torch import Tensor from labels import * device = torch.device("cuda" if torch.cuda.is_available() else "cpu") batch_size, n_class, h, w = 1, 19, 480, 320 vgg_model = VGGNet(requires_grad=True) fcn_model = FCNs(pretrained_net=vgg_model, n_class=n_class).to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(fcn_model.parameters(), lr=1e-3, momentum=0.9) preprocess = transforms.Compose([ # transforms.Scale(256), # transforms.ToTensor(), # normalize transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) dataset = CityScapeDataset('.\\data', (h, w), transform=preprocess, target_transform=transforms.RandomHorizontalFlip()) img, label = dataset[0]
import tensorflow as tf from model import VGGNet import numpy as np from data_loader import DataLoader import time ckpt_path = '../ckpt/model_0.ckpt' net = VGGNet([224, 224], 128, training=False) net.build() sess = tf.Session() saver = tf.train.Saver(tf.global_variables(), max_to_keep=None) # config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.7 if ckpt_path: saver.restore(sess, ckpt_path) loader = DataLoader() batch = 64 valid_batch_num = loader.valid_urls.shape[0] // batch cou = 1 # for idx in range(valid_batch_num): # res = loader.get_valid_batch_data(batch) # feed_dicts = {net.inputs: res[0], net.ground_truth: res[1]} # # sess.run(optimizer, feed_dict=feed_dicts) # fc_16 = sess.run([net.fc_16], feed_dict=feed_dicts) # fc_16 = np.array(fc_16[0]) # for i in range(batch): # if np.argmax(fc_16[i, :]) == np.argmax([res[1][i, :]]):
import torch import torch.nn as nn import torch.optim as optim from model import VGGNet, VGG_CONFS MODEL_PATH = 'models/checkpoint_e49.pkl' SAMPLE_IMG_PATH = '' device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') cpt = torch.load(MODEL_PATH, map_location=device) epoch = cpt['epoch'] seed = cpt['seed'] total_steps = cpt['total_steps'] # make even this the same... vgg16 = nn.parallel.DataParallel( VGGNet(VGG_CONFS['vgg16'], dim=32, num_classes=10)) vgg16.load_state_dict(cpt['model']) print(vgg16) # test loading optimizer optimizer = optim.SGD(vgg16.parameters(), lr=0.0001, weight_decay=0.00005, momentum=0.9).load_state_dict(cpt['optimizer'])
download=True) test_dataset = torchvision.datasets.SVHN(root='../data', split='test', transform=transforms.ToTensor(), download=True) # Data loader train_data_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_data_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) if sys.argv[1] == 'vgg': model = VGGNet() elif sys.argv[1] == 'mobile': model = MobileNet() elif sys.argv[1] == 'custom': model = CifarClassifier() else: raise ValueError(f'Unknown network type {sys.argv[1]}') model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) loss_fn = nn.CrossEntropyLoss() for epoch in range(num_epochs): for i, (x, x_class) in enumerate(train_data_loader): # Forward pass x = x.cuda() #.view(-1, img_size) class_logits = model(x)
download=True, transform=transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomResizedCrop(32, scale=(0.8, 1.0)), transforms.ToTensor(), normalize, ])) print('Dataset created - size: {}'.format(len(dataset))) seed = torch.initial_seed() print('Using seed : {}'.format(seed)) # create model & train on multiple GPUs vggnet = VGGNet(VGG_CONFS[MODEL_TYPE], dim=IMAGE_DIM, num_classes=NUM_CLASSES).to(device) vggnet = torch.nn.parallel.DataParallel(vggnet, device_ids=DEVICE_IDS) print(vggnet) print('VGGNet created') dataloader = data.DataLoader(dataset, shuffle=True, pin_memory=True, drop_last=True, num_workers=4, batch_size=BATCH_SIZE) print('Dataloader created') # create optimizer optimizer = optim.SGD(params=vggnet.parameters(), lr=LR_INIT,
from model import VGGNet from tensorflow.keras.optimizers import Adam from tensorflow.keras.callbacks import EarlyStopping import numpy as np data = np.load("train_val_test.npz") train_X, train_Y, val_X, val_Y, test_X, test_Y = data["arr_0"], data[ "arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"] vgg = VGGNet() model = vgg.build() opt = Adam(lr=1e-3) early_stop = EarlyStopping(monitor='val_loss', patience=5) model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"]) history = model.fit(train_X, train_Y, epochs=30, validation_data=(val_X, val_Y), callbacks=[early_stop]) model.save("model.h5")
from PIL import Image from PIL import ImageEnhance import paddle.fluid as fluid from multiprocessing import cpu_count import matplotlib.pyplot as plt from model import VGGNet from data_processor import * ''' 模型校验 ''' with fluid.dygraph.guard(): model, _ = fluid.load_dygraph("vgg") vgg = VGGNet() vgg.load_dict(model) vgg.eval() accs = [] for batch_id, data in enumerate(eval_reader()): dy_x_data = np.array([x[0] for x in data]).astype('float32') y_data = np.array([x[1] for x in data]).astype('int') y_data = y_data[:, np.newaxis] img = fluid.dygraph.to_variable(dy_x_data) label = fluid.dygraph.to_variable(y_data) out, acc = vgg(img, label) lab = np.argsort(out.numpy()) accs.append(acc.numpy()[0]) print(np.mean(accs))
import tensorflow as tf import sys from model import VGGNet from data_loader import DataLoader net = VGGNet([224, 224], 128) net.build() loss = net.loss() # print(tf.global_variables()) ckpt_path = '../ckpt/model.ckpt-0' loader = DataLoader() sess = tf.Session() optimizer = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss) saver = tf.train.Saver(tf.global_variables(), max_to_keep=None) ls = tf.summary.scalar('loss', loss) train_writer = tf.summary.FileWriter('../log_train', sess.graph) valid_writer = tf.summary.FileWriter('../log_valid', sess.graph) batch = 32 batch_num = loader.images_urls.shape[0] // batch # config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.7 valid_batch_num = loader.valid_urls.shape[0] // batch if ckpt_path: saver.restore(sess, ckpt_path) else: sess.run(tf.global_variables_initializer())
class Solver(object): DEFAULTS = {} def __init__(self, version, data_loader, config, output_txt): """ Initializes a Solver object """ # data loader self.__dict__.update(Solver.DEFAULTS, **config) self.version = version self.data_loader = data_loader self.output_txt = output_txt self.build_model() # start with a pre-trained model if self.pretrained_model: self.load_pretrained_model() def build_model(self): """ Instantiates the model, loss criterion, and optimizer """ # instantiate model self.model = VGGNet(self.config, self.use_batch_norm, self.input_channels, self.class_count, self.init_weights) # instantiate loss criterion self.criterion = nn.CrossEntropyLoss() # instantiate optimizer self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr, momentum=self.momentum, weight_decay=self.weight_decay) # print network self.print_network(self.model, 'VGGNet') # use gpu if enabled if torch.cuda.is_available() and self.use_gpu: self.model.cuda() self.criterion.cuda() def print_network(self, model, name): """ Prints the structure of the network and the total number of parameters """ num_params = 0 for p in model.parameters(): num_params += p.numel() write_print(self.output_txt, name) write_print(self.output_txt, str(model)) write_print(self.output_txt, 'The number of parameters: {}'.format(num_params)) def load_pretrained_model(self): """ loads a pre-trained model from a .pth file """ self.model.load_state_dict( torch.load( os.path.join(self.model_save_path, '{}.pth'.format(self.pretrained_model)))) write_print(self.output_txt, 'loaded trained model {}'.format(self.pretrained_model)) def print_loss_log(self, start_time, iters_per_epoch, e, i, loss): """ Prints the loss and elapsed time for each epoch """ total_iter = self.num_epochs * iters_per_epoch cur_iter = e * iters_per_epoch + i elapsed = time.time() - start_time total_time = (total_iter - cur_iter) * elapsed / (cur_iter + 1) epoch_time = (iters_per_epoch - i) * elapsed / (cur_iter + 1) epoch_time = str(datetime.timedelta(seconds=epoch_time)) total_time = str(datetime.timedelta(seconds=total_time)) elapsed = str(datetime.timedelta(seconds=elapsed)) log = "Elapsed {}/{} -- {}, Epoch [{}/{}], Iter [{}/{}], " \ "loss: {:.4f}".format(elapsed, epoch_time, total_time, e + 1, self.num_epochs, i + 1, iters_per_epoch, loss) write_print(self.output_txt, log) def save_model(self, e): """ Saves a model per e epoch """ path = os.path.join(self.model_save_path, '{}/{}.pth'.format(self.version, e + 1)) torch.save(self.model.state_dict(), path) def model_step(self, images, labels): """ A step for each iteration """ # set model in training mode self.model.train() # empty the gradients of the model through the optimizer self.optimizer.zero_grad() # forward pass output = self.model(images) # compute loss loss = self.criterion(output, labels.squeeze()) # compute gradients using back propagation loss.backward() # update parameters self.optimizer.step() # return loss return loss def train(self): """ Training process """ self.losses = [] self.top_1_acc = [] self.top_5_acc = [] iters_per_epoch = len(self.data_loader) # start with a trained model if exists if self.pretrained_model: start = int(self.pretrained_model.split('/')[-1]) else: start = 0 # start training start_time = time.time() for e in range(start, self.num_epochs): for i, (images, labels) in enumerate(tqdm(self.data_loader)): images = to_var(images, self.use_gpu) labels = to_var(torch.LongTensor(labels), self.use_gpu) loss = self.model_step(images, labels) # print out loss log if (e + 1) % self.loss_log_step == 0: self.print_loss_log(start_time, iters_per_epoch, e, i, loss) self.losses.append((e, loss)) # save model if (e + 1) % self.model_save_step == 0: self.save_model(e) # evaluate on train dataset # if (e + 1) % self.train_eval_step == 0: # top_1_acc, top_5_acc = self.train_evaluate(e) # self.top_1_acc.append((e, top_1_acc)) # self.top_5_acc.append((e, top_5_acc)) # print losses write_print(self.output_txt, '\n--Losses--') for e, loss in self.losses: write_print(self.output_txt, str(e) + ' {:.4f}'.format(loss)) # print top_1_acc write_print(self.output_txt, '\n--Top 1 accuracy--') for e, acc in self.top_1_acc: write_print(self.output_txt, str(e) + ' {:.4f}'.format(acc)) # print top_5_acc write_print(self.output_txt, '\n--Top 5 accuracy--') for e, acc in self.top_5_acc: write_print(self.output_txt, str(e) + ' {:.4f}'.format(acc)) def eval(self, data_loader): """ Returns the count of top 1 and top 5 predictions """ # set the model to eval mode self.model.eval() top_1_correct = 0 top_5_correct = 0 total = 0 with torch.no_grad(): for images, labels in data_loader: images = to_var(images, self.use_gpu) labels = to_var(torch.LongTensor(labels), self.use_gpu) output = self.model(images) total += labels.size()[0] # top 1 # get the max for each instance in the batch _, top_1_output = torch.max(output.data, dim=1) top_1_correct += torch.sum( torch.eq(labels.squeeze(), top_1_output)) # top 5 _, top_5_output = torch.topk(output.data, k=5, dim=1) for i, label in enumerate(labels): if label in top_5_output[i]: top_5_correct += 1 return top_1_correct.item(), top_5_correct, total def train_evaluate(self, e): """ Evaluates the performance of the model using the train dataset """ top_1_correct, top_5_correct, total = self.eval(self.data_loader) log = "Epoch [{}/{}]--top_1_acc: {:.4f}--top_5_acc: {:.4f}".format( e + 1, self.num_epochs, top_1_correct / total, top_5_correct / total) write_print(self.output_txt, log) return top_1_correct / total, top_5_correct / total def test(self): """ Evaluates the performance of the model using the test dataset """ top_1_correct, top_5_correct, total = self.eval(self.data_loader) log = "top_1_acc: {:.4f}--top_5_acc: {:.4f}".format( top_1_correct / total, top_5_correct / total) write_print(self.output_txt, log)
size = np.array(content_image.size) * scale content_image = content_image.resize(size.astype(int), Image.ANTIALIAS) content_image = transform(content_image).unsqueeze(0).cuda() # Style Image processing style_image = Image.open(style_image) style_image = style_image.resize( [content_image.size(2), content_image.size(3)], Image.LANCZOS) style_image = transform(style_image).unsqueeze(0).cuda() # Initialize result and optimizer result_image = Variable(content_image.clone(), requires_grad=True) optimizer = torch.optim.Adam([result_image], lr=0.003, betas=[0.5, 0.999]) # Model vgg = VGGNet() vgg = vgg.cuda() # Train for step in range(epochs): target_features = vgg(result_image) content_features = vgg(Variable(content_image)) style_features = vgg(Variable(style_image)) style_loss = 0 content_loss = 0 for f1, f2, f3 in zip(target_features, content_features, style_features): # Content loss content_loss += torch.mean((f1 - f2)**2)
def main(): # 生成一个图像,均值为127.5,方差为20 result = initial_result((1, 466, 712, 3), 127.5, 20) # 读取内容图像和风格图像 content_val = read_img(content_img_path) style_val = read_img(style_img_path) content = tf.placeholder(tf.float32, shape=[1, 466, 712, 3]) style = tf.placeholder(tf.float32, shape=[1, 615, 500, 3]) # 载入模型,注意:在python3中,需要添加一句: encoding='latin1' data_dict = np.load(vgg_16_npy_pyth, encoding='latin1').item() # 创建这三张图像的 vgg 对象 vgg_for_content = VGGNet(data_dict) vgg_for_style = VGGNet(data_dict) vgg_for_result = VGGNet(data_dict) # 创建每个神经网络 vgg_for_content.build(content) vgg_for_style.build(style) vgg_for_result.build(result) # 提取哪些层特征 # 需要注意的是:内容特征抽取的层数和结果特征抽取的层数必须相同 # 风格特征抽取的层数和结果特征抽取的层数必须相同 content_features = [ # vgg_for_content.conv1_2, # vgg_for_content.conv2_2, # vgg_for_content.conv3_3, vgg_for_content.conv4_3, vgg_for_content.conv5_3, ] result_content_features = [ # vgg_for_result.conv1_2, # vgg_for_result.conv2_2, # vgg_for_result.conv3_3, vgg_for_result.conv4_3, vgg_for_result.conv5_3, ] style_features = [ vgg_for_style.conv2_2, ] result_style_features = [ vgg_for_result.conv2_2, ] style_gram = [gram_matrix(feature) for feature in style_features] result_style_gram = [ gram_matrix(feature) for feature in result_style_features ] # 计算内容损失 content_loss = tf.zeros(1, tf.float32) for c, c_ in zip(content_features, result_content_features): content_loss += tf.reduce_mean((c - c_)**2, axis=[1, 2, 3]) # 计算风格损失 style_loss = tf.zeros(1, tf.float32) for s, s_ in zip(style_gram, result_style_gram): # 因为在计算gram矩阵的时候,降低了一维,所以,只需要在[1, 2]两个维度求均值即可 style_loss += tf.reduce_mean((s - s_)**2, axis=[1, 2]) # 总的损失函数 loss = content_loss * lambda_c + style_loss * lambda_s train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) print("1111111") for step in range(num_steps): loss_value, content_loss_value, style_loss_value, _ = sess.run( [loss, content_loss, style_loss, train_op], feed_dict={ content: content_val, style: style_val }) print( 'step: %d, loss_value: %.4f, content_loss: %.4f, style_loss: %.4f' % (step + 1, loss_value[0], content_loss_value[0], style_loss_value[0])) if step % 100 == 0: result_img_path = os.path.join(output_dir, 'result_%05d.jpg' % (step + 1)) # 将图像取出,因为之前是4维,所以需要使用一个索引0,将其取出 result_val = result.eval(sess)[0] # np.clip() numpy.clip(a, a_min, a_max, out=None)[source] # 其中a是一个数组,后面两个参数分别表示最小和最大值 result_val = np.clip(result_val, 0, 255) img_arr = np.asarray(result_val, np.uint8) img = Image.fromarray(img_arr) img.save(result_img_path)