def __init__(self, cfg) -> None: super(TrainNet, self).__init__() self.cfg = cfg self.dataset = DatasetFactory(cfg) self.net = ResNet(cfg, **cfg.net) self.loss_fn = nn.CrossEntropyLoss()
def loadnet(npoints=10, path_to_model=None): # Load the trained model. net = ResNet(num_maps=npoints) checkpoint = torch.load(path_to_model) checkpoint = {k.replace('module.', ''): v for k, v in checkpoint.items()} net.load_state_dict(checkpoint, strict=False) return net.to('cuda')
def test_resnet(modelpath, batch_size): dataLoader = DataLoader() net = ResNet.build_resnet('resnet34') net.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy']) net.build((1,64,64,3)) net.load_weights(modelpath) test_images, test_labels = dataLoader.get_batch_test(batch_size) net.evaluate(test_images, test_labels, verbose=2)
class TrainNet(pl.LightningModule): def __init__(self, cfg) -> None: super(TrainNet, self).__init__() self.cfg = cfg self.dataset = DatasetFactory(cfg) self.net = ResNet(cfg, **cfg.net) self.loss_fn = nn.CrossEntropyLoss() def forward(self, x): return self.net(x) def prepare_data(self): self.dataset.prepare_dataset() def train_dataloader(self): return self.dataset.train_loader() def val_dataloader(self): return self.dataset.val_loader() def configure_optimizers(self): optimizer = SGD(self.net.parameters(), **self.cfg.training.optimizer) scheduler = lr_scheduler.MultiStepLR(optimizer, **self.cfg.training.scheduler) return [optimizer], [scheduler] def training_step(self, batch, batch_idx): self.train() x, y = batch logits = self(x) loss = self.loss_fn(logits, y) predicted_labels = logits.argmax(dim=1) accuracy = (predicted_labels == y).sum().item() / len(y) tensorboard_logs = {'train_loss': loss, 'train_acc': accuracy} return {'loss': loss, 'log': tensorboard_logs} def validation_step(self, batch, batch_nb): self.eval() x, y = batch y_hat = self(x) predicted_labels = y_hat.argmax(dim=1) accuracy = (predicted_labels == y).sum().float() / len(y) return {'val_loss': self.loss_fn(y_hat, y), 'val_acc': accuracy} def validation_epoch_end(self, outputs): avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean() avg_acc = torch.stack([x['val_acc'] for x in outputs]).mean() tensorboard_logs = {'val_loss': avg_loss, 'val_acc': avg_acc} return { 'val_loss': avg_loss, 'val_acc': avg_acc, 'log': tensorboard_logs }
def test(img): device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') # 检测device是否支持cuda im = Image.open(img) im = im.resize((32, 32)) # im.save("result.jpg") tensor_test = TF.to_tensor( im) # 此时的im是一张32*32的PIL.Image.Image,这一步是将图片转换为tensor path_model = os.path.join("model", "resnet.ckpt") model = ResNet(ResidualBlock, layers, number_classes).to(device) # 初始化ResNet model.load_state_dict(torch.load(path_model)) # 装载已经训练好的ResNet模型 # print(model) model.eval() # 开始评估 with torch.no_grad(): list_test = tensor_test.tolist() # 将tensor转化为list list_test = [list_test] tensor_test = torch.Tensor(list_test) images = tensor_test.to(device) # 把执行设备从cpu改为gpu outputs = model(images) # 将这张图送进model里面测试 outputs = TNF.softmax(outputs, dim=1) # print(outputs) _, predicted = torch.max(outputs.data, 1) # print(predicted.data) if predicted[0] == 0: # 是人脸 return 0 else: return 1 # 不是人脸
def test(img): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') im = Image.open(img) im = im.resize((32, 32)) # im.save("result.jpg") tensor_test = TF.to_tensor(im) path_model = os.path.join("model", "resnet.ckpt") model = ResNet(ResidualBlock, layers, number_classes).to(device) model.load_state_dict(torch.load(path_model)) # print(model) model.eval() with torch.no_grad(): list_test = tensor_test.tolist() list_test = [list_test] tensor_test = torch.Tensor(list_test) images = tensor_test.to(device) outputs = model(images) outputs = TNF.softmax(outputs, dim=1) # print(outputs) _, predicted = torch.max(outputs.data, 1) # print(predicted.data) if predicted[0] == 0: return 0 else: return 1
def main(): parser = argparse.ArgumentParser(description='AIO trainer') parser.add_argument('--lr', type=float, default=1e-1) parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--num_workers', type=int, default=4) parser.add_argument('--epochs', type=int, default=5) parser.add_argument('--dataset_path', type=str, required=True) parser.add_argument('--model_file', type=str) args = parser.parse_args() torch.manual_seed(42) torch.cuda.manual_seed_all(42) np.random.seed(42) print('[Train dataset]') dataset_files = glob.glob(args.dataset_path+'/*.hdf5') for f in dataset_files: print('>> ' + f) hdfdatasets = [H5Dataset(f) for f in dataset_files] dataset = torch_data.ConcatDataset(hdfdatasets) data_loader = torch_data.DataLoader(dataset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True, pin_memory=True) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') network = ResNet(9, 128, 5, 8**2).to(device) optimizer = optim.SGD(network.parameters(), lr=args.lr, momentum=.9, weight_decay=1e-4, nesterov=True) init_epoch = 0 if args.model_file: if os.path.exists(args.model_file): state = torch.load(args.model_file) init_epoch = state['epoch'] network.load_state_dict(state['state_dict']) optimizer.load_state_dict(state['optimizer']) print('{} loaded'.format(args.model_file)) del state for epoch in range(init_epoch+1, args.epochs+1): start_time = time.time() loss, pi_loss, v_loss = train(network, optimizer, data_loader, device) print('epoch {}/{} - loss: {:.4f}, pi: {:.4f}, v: {:.4f}'.format(epoch, args.epochs, loss, pi_loss, v_loss)) end_time = time.time() print('{} elapsed'.format(end_time - start_time)) now = datetime.datetime.now() pth_file_name = 'pth/{}{:02d}{:02d}_{:02d}_ckpt_{}.pth'.format(now.year, now.month, now.day, now.hour, epoch) state = { 'state_dict': network.state_dict(), 'epoch': epoch, 'optimizer': optimizer.state_dict(), 'loss': (loss, pi_loss, v_loss) } torch.save(state, pth_file_name)
def train_resnet(batch_size, epoch): dataLoader = DataLoader() # build callbacks checkpoint = tf.keras.callbacks.ModelCheckpoint(f'./weight/{epoch}_epoch_resnet_weight.h5', save_best_only=True, save_weights_only=True, verbose=1, save_freq='epoch') # build model net = ResNet.build_resnet('resnet34') # resnet18 resnet34 # optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005, decay=1e-6) optimizer =tf.keras.optimizers.SGD(learning_rate=0.0005, momentum=0.9, decay=1e-6, nesterov=False) net.compile(optimizer=optimizer,loss='sparse_categorical_crossentropy', metrics=['accuracy']) # num_iter = dataLoader.num_train//batch_size # for e in range(epoch): # for i in range(num_iter): # train_images, train_labels = dataLoader.get_batch_train(batch_size) # net.fit(train_images, train_labels, shuffle=False, batch_size=batch_size, validation_split=0.1, callbacks=[checkpoint]) # net.save_weights("./weight/"+str(e+1)+"epoch_iter"+str(i)+"_resnet_weight.h5") data_generate = ImageDataGenerator( featurewise_center=False, samplewise_center=False, featurewise_std_normalization=False, samplewise_std_normalization=False, zca_epsilon=1e-6, zca_whitening=False, rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, shear_range=0., zoom_range=0.1, channel_shift_range=0, fill_mode='nearest', cval=0., horizontal_flip=True, vertical_flip=False, rescale=None, preprocessing_function=None, data_format='channels_last', validation_split=0.0) train_images, train_labels = dataLoader.get_batch_train(60000) net.fit(data_generate.flow(train_images, train_labels, batch_size=batch_size, shuffle=True,), steps_per_epoch=len(train_images)//batch_size, epochs=epoch, callbacks=[checkpoint], shuffle=True)
parser.add_argument("--num_epochs", type=int, default=200) parser.add_argument('--train', action="store_true") parser.add_argument('--evaluate', action="store_true") args = parser.parse_args() tf.logging.set_verbosity(tf.logging.INFO) if __name__ == "__main__": estimator = tf.estimator.Estimator( model_fn=lambda features, labels, mode, params: Classifier(network=ResNet( conv_param=Param(filters=32, kernel_size=[3, 3], strides=[1, 1]), pool_param=None, residual_params=[ Param(filters=32, strides=[1, 1], blocks=3), Param(filters=64, strides=[2, 2], blocks=3), Param(filters=128, strides=[2, 2], blocks=3), ], num_classes=10))(features, labels, mode, Param(params)), model_dir=args.model_dir, config=tf.estimator.RunConfig(save_summary_steps=100, save_checkpoints_steps=1000), params=dict( weight_decay=2e-4, learning_rate=lambda global_step: tf.train.exponential_decay( learning_rate=0.1 * args.batch_size / 64, global_step=global_step, decay_steps=50000 * args.num_epochs / args.batch_size / 4, decay_rate=0.1), momentum=0.9,
iterator = tf.data.Iterator.from_structure(TrainDataset.output_types, TrainDataset.output_shapes) next_batch = iterator.get_next() training_init_op = iterator.make_initializer(TrainDataset) validation_init_op = iterator.make_initializer(EvalDataset) ##################### get the input pipline ############################ ##################### setup the network ################################ x = tf.placeholder(tf.float32, shape=(None, 224, 224, 3)) y = tf.placeholder(tf.int32, shape=(None, NUM_CLASSES)) is_training = tf.placeholder('bool', []) keep_prob = tf.placeholder(tf.float32) depth = 50 # 可以是50、101、152 ResNetModel = ResNet.ResNetModel(is_training, depth, NUM_CLASSES) fc_image = ResNetModel.inference(x) net_output = ResNet.get_net_output(fc_image=fc_image, classNum=NUM_CLASSES, KEEP_PROB=keep_prob) prediction = tf.argmax(net_output, 1) groundtruth = tf.argmax(y, 1) # 训练操作 with tf.name_scope("train"): loss = get_loss(net_output, y) train_layers = ["scale5", "fc"] train_op = ResNetModel.optimize(loss=loss, learning_rate=LEARNINT_RATE, train_layers=train_layers) # 评价操作
for i in range(len(all_fnames)): fp.write('{}\t{}\t{}\n'.format(all_fnames[i], all_gt[i], all_pred[i])) return accuracy, report, all_pred def load_pretrained(model, pretrained): # handle the case when model is wrapped inside nn.DataParallel try: state = model.module.state_dict() except AttributeError: state = model.state_dict() state.update( torch.load(pretrained, map_location=lambda storage, loc: storage)) model.load_state_dict(state) return model if __name__ == '__main__': model = ResNet(out_dim=4, encoder='resnet50', pretrained=True).cuda() pretrained_file = '/phoenix/S7/kz298/AppleDiseaseClassification/runs/resnet50_baseline_all/best_model.pth' model = load_pretrained(model, pretrained_file) val_loader = build_dataloader(batch_size=16, mode='test', num_workers=8) out_file = '/phoenix/S7/kz298/AppleDiseaseClassification/test_results.txt' validate_model(model, val_loader, out_file)
BASE_LR = 0.001 # batch_size=1,GPU=1 RESUME = True dataset = Dataset() shuffled_reader = fluid.io.shuffle(dataset.train_reader, 1000) train_batch_reader = fluid.io.batch(shuffled_reader, BATCH_SIZE) val_batch_reader = fluid.io.batch(dataset.val_reader, 1) test_batch_reader = fluid.io.batch(dataset.test_reader, 1) # Train model DEVICE = "cuda" gpu_place = fluid.CUDAPlace(0) logger.info("before training") with fluid.dygraph.guard(gpu_place): # cats = LeNet(num_classes=12) # cats = VGG16(num_classes=12) cats = ResNet(num_classes=dataset.get_class_number()) if os.path.exists("result/cats_model.pdparams") and RESUME: cats.load_dict(fluid.load_dygraph("result/cats_model")[0]) logger.info("loading model weight") cce = fluid.layers.cross_entropy acc = fluid.layers.accuracy # Setup piecewise decay step1 = int(EPOCH * EPOCH_SIZE / BATCH_SIZE / 1.5) step2 = int(EPOCH * EPOCH_SIZE / BATCH_SIZE / 1.1) boundaries = [step1, step2] lr = BASE_LR * BATCH_SIZE lr_steps = [lr, lr * 0.1, lr * 0.01] learning_rate = fluid.layers.piecewise_decay(boundaries, lr_steps)
transform=transform_test) train_items = DataLoader(dataset=train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) test_itmes = DataLoader(dataset=test_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) iters = 0 test_iter = 0 torch.cuda.set_device(args.gpu[0]) model = nn.DataParallel(ResNet(depth=19), device_ids=args.gpu).cuda() print(model) opt = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = nn.CrossEntropyLoss().cuda() batch_ind = 0 for epoch in range(1, args.epoch + 1): adjust_learning_rate(opt, epoch) for index, samples in enumerate(train_items): inputs, targets = samples[0].cuda(), samples[1].cuda(async=True)
def train_resnet(batch_size, epoch): dataLoader = DataLoader() # build callbacks checkpoint = tf.keras.callbacks.ModelCheckpoint( '{epoch}_epoch_resnet_weight.h5', save_best_only=True, save_weights_only=True, verbose=1, save_freq='epoch') # build model net = ResNet.build_resnet('resnet34') # resnet18 resnet34 optimizer = tf.keras.optimizers.SGD(learning_rate=0.0005, momentum=0.9, decay=1e-6, nesterov=False) # optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005, decay=1e-6) net.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) # num_iter = dataLoader.num_train//batch_size # for e in range(epoch): # for i in range(num_iter): # train_images, train_labels = dataLoader.get_batch_train(batch_size) # net.fit(train_images, train_labels, # shuffle=False, # net.save_weights(str (e+1) + "epoch_iter" + str(i) + "_resnet_weight.h5") # 详细参数见官方文档:https://tensorflow.google.cn/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator?hl=en data_generate = ImageDataGenerator( featurewise_center=False, # 将输入数据的均值设置为0 samplewise_center=False, # 将每个样本的均值设置为0 featurewise_std_normalization=False, # 将输入除以数据标准差,逐特征进行 samplewise_std_normalization=False, # 将每个输出除以其标准差 zca_epsilon=1e-6, # ZCA白化的epsilon值,默认为1e-6 zca_whitening=False, # 是否应用ZCA白化 rotation_range=10, # 随机旋转的度数范围,输入为整数 width_shift_range=0.1, # 左右平移,输入为浮点数,大于1时输出为像素值 height_shift_range=0.1, # 上下平移,输入为浮点数,大于1时输出为像素值 shear_range=0., # 剪切强度,输入为浮点数 zoom_range=0.1, # 随机缩放,输入为浮点数 channel_shift_range=0., # 随机通道转换范围,输入为浮点数 fill_mode='nearest', # 输入边界以外点的填充方式,还有constant,reflect,wrap三种填充方式 cval=0., # 用于填充的值,当fill_mode='constant'时生效 horizontal_flip=True, # 随机水平翻转 vertical_flip=False, # 随机垂直翻转 rescale=None, # 重缩放因子,为None或0时不进行缩放 preprocessing_function=None, # 应用于每个输入的函数 data_format='channels_last', # 图像数据格式,默认为channels_last validation_split=0.0) # 引用自:https://www.jianshu.com/p/1576da1abd71 train_images, train_labels = dataLoader.get_batch_train(60000) net.fit( data_generate.flow( train_images, train_labels, batch_size=batch_size, shuffle=True, #save_to_dir='resource/images' ), steps_per_epoch=len(train_images) // batch_size, epochs=epoch, callbacks=[checkpoint], shuffle=True)
import torch import os import sys from network import ResNet if __name__ == '__main__': if len(sys.argv) != 2: print('usage: python3 {} pth'.format(sys.argv[0])) sys.exit() pth_file = sys.argv[1] net = ResNet(9, 128, 5, 8**2, True) if os.path.exists(pth_file): state_dict = torch.load(pth_file, map_location='cpu')['state_dict'] net.load_state_dict(state_dict) print('{} loaded'.format(pth_file)) del state_dict ext_pos = pth_file.rfind('.') onnx_file = pth_file[:ext_pos] + '.onnx' if ext_pos != -1 else pth_file + '.onnx' dummy_input = torch.zeros(100, 9, 8, 8) torch.onnx.export(net, dummy_input, onnx_file, verbose=True)
def train_model(output_dir, exp_id): log_dir = os.path.join(output_dir, exp_id) if not os.path.exists(log_dir): os.makedirs(log_dir) writer = SummaryWriter(log_dir) model = ResNet(out_dim=4, encoder='resnet50', pretrained=True).cuda() init_lr = 1e-4 optimizer = torch.optim.Adam(model.parameters(), lr=init_lr) train_batch_size = 16 train_loader = build_dataloader(batch_size=train_batch_size, mode='train_val', num_workers=8) val_loader = build_dataloader(batch_size=16, mode='test', num_workers=8) # weight = torch.Tensor([0.1438004567630554, 0.08892753894551772, 0.10387256375786466, 0.6633994405335623]).cuda() # loss_fn = nn.CrossEntropyLoss(weight=weight) loss_fn = nn.CrossEntropyLoss() total_epochs = 120 # logging options model_save_interval_epoch = 10 lr_decay_factor = 0.5 lr_decay_interval_epoch = 15 # start training total_cnt = len(train_loader.dataset) num_steps_per_epoch = total_cnt // train_batch_size # best accuracy best_accuracy = 0 step = 0 for epoch in range(1, total_epochs + 1): model.train() processed_cnt = 0 for batch_idx, item in enumerate(train_loader): step += 1 # zero out gradient optimizer.zero_grad() im = item['im'].cuda() label = item['label'].squeeze(1).cuda() res = model(im) loss = loss_fn(res, label) loss.backward() optimizer.step() loss = loss.item() writer.add_scalar('Train/loss', loss, step) batch_size = im.shape[0] processed_cnt += batch_size print( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tlr: {:.8f}'. format(epoch, processed_cnt, total_cnt, 100. * processed_cnt / total_cnt, loss, optimizer.param_groups[0]['lr'])) # validate model if step % (num_steps_per_epoch // 3) == 0: accuracy, report, all_pred = validate_model( writer, model, val_loader, step) if accuracy > best_accuracy: best_accuracy = accuracy print('best_accuracy updated to: {}'.format(best_accuracy)) torch.save(model.state_dict(), os.path.join(log_dir, 'best_model.pth')) with open(os.path.join(log_dir, 'best_metric.txt'), 'w') as fp: fp.write(report) with open(os.path.join(log_dir, 'best_pred.txt'), 'w') as fp: for pred in all_pred: fp.write('{}\n'.format(label2name_dict[pred])) # whether to save the current model if epoch % model_save_interval_epoch == 0 or epoch == total_epochs: torch.save( model.state_dict(), os.path.join(log_dir, 'model_epoch_{}.pth'.format(epoch))) # whether to decay learning rate if lr_decay_factor is not None and lr_decay_interval_epoch is not None: if epoch % lr_decay_interval_epoch == 0: for param_group in optimizer.param_groups: param_group['lr'] *= lr_decay_factor
# path_data_N = os.path.join("database", "N") # # Test # train_loader = train_loader(os.path.join(path_data_P, "0.txt"), # os.path.join(path_data_N, "0.txt"), batch_size=batchsize) # print(len(train_loader)) # print(len(train_loader[0])) # print(len(train_loader[0][0])) # print(train_loader[0][1]) # print(len(train_loader[0][0][0])) # print(len(train_loader[0][0][0][0])) # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = ResNet(ResidualBlock, layers, number_classes).to(device) print("Model has been defined.") # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Train the model # # Test # path_data_P = os.path.join("database", "P") # path_data_N = os.path.join("database", "N") # train_loader = train_loader(os.path.join(path_data_P, "0.txt"), # os.path.join(path_data_N, "0.txt"), batch_size=batchsize) # total_step = len(train_loader) # curr_lr = learning_rate # for epoch in range(num_epochs):