def main(args): writer = MlflowWriter(args.exp_name) writer = write_log_base(args, writer) logger = CustomMlFlowLogger(writer) pl.seed_everything(args.seed) model = mobilenet_v2(pretrained=True, num_classes=args.num_classes) datamodule = AnimeFaceDataModule(args) criterion = nn.CrossEntropyLoss() plmodel = ImageClassifier(args, model, criterion) trainer = pl.Trainer( logger=logger, checkpoint_callback=False, gpus=2, max_epochs=args.epochs, flush_logs_every_n_steps=args.print_freq, log_every_n_steps=args.log_freq, accelerator="dp", precision=16 if args.apex else 32, deterministic=True, num_sanity_val_steps=-1, ) starttime = time.time() # 実行時間計測(実時間) trainer.fit(plmodel, datamodule=datamodule) trainer.test(plmodel, datamodule=datamodule, verbose=True) writer.move_mlruns() # 実行時間表示 endtime = time.time() interval = endtime - starttime print("elapsed time = {0:d}h {1:d}m {2:d}s".format( int(interval / 3600), int((interval % 3600) / 60), int((interval % 3600) % 60), ))
def dump_features(data_dir, features_dir): with open(os.path.join(data_dir, 'labels.txt'), 'r') as fp: labels = [line.strip() for line in fp.readlines()] label_to_index = {label: index for index, label in enumerate(labels)} model = mobilenet_v2() model.trainable = False for category in sorted(os.listdir(data_dir)): if not os.path.isdir(os.path.join(data_dir, category)): continue print(f'calculate features of {category} data...') features, label = [], [] for root, dirs, files in os.walk(os.path.join(data_dir, category)): if not files: continue print(root) for filename in files: image = tf.io.read_file(os.path.join(root, filename)) image = tf.io.decode_image(image, channels=3) image = tf.image.convert_image_dtype(image, dtype=tf.float32) features.append( model(tf.expand_dims(image, axis=0)).numpy().flatten()) label.append(label_to_index[os.path.basename(root)]) np.savez(os.path.join(features_dir, f'{category}.npz'), inputs=features, targets=label)
def train(data_dir, features_dir, weights_dir, batch_size=32): if len(glob.glob(os.path.join(features_dir, '*.npz'))) == 0: os.makedirs(features_dir, exist_ok=True) dump_features(data_dir, features_dir) def dataset(category): npz = np.load(os.path.join(features_dir, f'{category}.npz')) inputs = npz['inputs'] targets = npz['targets'] size = inputs.shape[0] return tf.data.Dataset.from_tensor_slices( (inputs, targets)).shuffle(size), size training_data, training_size = dataset('training') validation_data, validation_size = dataset('validation') with open(os.path.join(data_dir, 'labels.txt')) as fp: labels = [line.strip() for line in fp.readlines()] classes = len(labels) model = tf.keras.Sequential([ tf.keras.layers.InputLayer((1280, )), tf.keras.layers.Dropout(rate=0.2), tf.keras.layers.Dense( classes, activation='softmax', kernel_regularizer=tf.keras.regularizers.l2(1e-4)), ]) model.summary() model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]) history = model.fit(training_data.repeat().batch(batch_size), steps_per_epoch=training_size // batch_size, epochs=100, validation_data=validation_data.batch(batch_size), validation_steps=validation_size // batch_size, callbacks=[tf.keras.callbacks.TensorBoard()]) print(history.history) model.save_weights(os.path.join(weights_dir, 'transfer_weights.h5')) classifier = tf.keras.Sequential([ mobilenet_v2(), model, ]) classifier.trainable = False classifier.save('transfer_classifier.h5')
def main(): logger, result_dir, _ = utils.config_backup_get_log(args, __file__) device = utils.get_device() utils.set_seed(args.seed, device) # set random seed dataset = COVID19DataSet(root=args.datapath, ctonly=args.ctonly) # load dataset trainset, testset = split_dataset(dataset=dataset, logger=logger) if args.model.lower() in ['mobilenet']: net = mobilenet_v2(task='classification', moco=False, ctonly=args.ctonly).to(device) elif args.model.lower() in ['densenet']: net = densenet121(task='classification', moco=False, ctonly=args.ctonly).to(device) else: raise Exception criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(net.parameters(), lr=args.lr, weight_decay=1e-3) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.1) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.bstrain, shuffle=True, num_workers=args.nworkers) testloader = torch.utils.data.DataLoader(testset, batch_size=args.bstest, shuffle=False, num_workers=args.nworkers) best_auroc = 0. print('==> Start training ..') start = time.time() for epoch in range(args.maxepoch): net = train(epoch, net, trainloader, criterion, optimizer, scheduler, args.model, device) scheduler.step() if epoch % 5 == 0: auroc, aupr, f1_score, accuracy = validate(net, testloader, device) logger.write( 'Epoch:%3d | AUROC: %5.4f | AUPR: %5.4f | F1_Score: %5.4f | Accuracy: %5.4f\n' % (epoch, auroc, aupr, f1_score, accuracy)) if auroc > best_auroc: best_auroc = auroc best_aupr = aupr best_epoch = epoch print("save checkpoint...") torch.save(net.state_dict(), './%s/%s.pth' % (result_dir, args.model)) auroc, aupr, f1_score, accuracy = validate(net, testloader, device) logger.write( 'Epoch:%3d | AUROC: %5.4f | AUPR: %5.4f | F1_Score: %5.4f | Accuracy: %5.4f\n' % (epoch, auroc, aupr, f1_score, accuracy)) if args.batchout: with open('temp_result.txt', 'w') as f: f.write("%10.8f\n" % (best_auroc)) f.write("%10.8f\n" % (best_aupr)) f.write("%d" % (best_epoch)) end = time.time() hours, rem = divmod(end - start, 3600) minutes, seconds = divmod(rem, 60) print("Elapsed Time: {:0>2}:{:0>2}:{:05.2f}".format( int(hours), int(minutes), seconds)) logger.write("Elapsed Time: {:0>2}:{:0>2}:{:05.2f}\n".format( int(hours), int(minutes), seconds)) return True
def main(): args.task_selection = args.task_selection.split(',') torch.manual_seed(args.seed) # LOAD DATASET stat_file = args.stat_file with open(stat_file, 'r') as f: data = pickle.load(f) mean, std = data['mean'], data['std'] mean = [float(m) for m in mean] std = [float(s) for s in std] normalize = transforms.Normalize(mean=mean, std=std) train_transform = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.RandomRotation(90), transforms.ToTensor(), normalize, ]) val_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), normalize, ]) if not args.shape_dataset: if args.task_selection is not None: classes = args.task_selection elif args.office_dataset: classes = ['style', 'genre'] elif args.bam_dataset: classes = ['content', 'emotion', 'media'] else: classes = ['artist_name', 'genre', 'style', 'technique', 'century'] valset = Wikiart(path_to_info_file=args.val_file, path_to_images=args.im_path, classes=classes, transform=val_transform) trainset = Wikiart(path_to_info_file=args.train_file, path_to_images=args.im_path, classes=classes, transform=train_transform) else: if args.task_selection is not None: classes = args.task_selection else: classes = ['shape', 'n_shapes', 'color_shape', 'color_background'] valset = ShapeDataset(root_dir='/export/home/kschwarz/Documents/Data/Geometric_Shapes', split='val', classes=classes, transform=val_transform) trainset = ShapeDataset(root_dir='/export/home/kschwarz/Documents/Data/Geometric_Shapes', split='train', classes=classes, transform=train_transform) if not trainset.labels_to_ints == valset.labels_to_ints: print('validation set and training set int labels do not match. Use int conversion of trainset') print(trainset.labels_to_ints, valset.labels_to_ints) valset.labels_to_ints = trainset.labels_to_ints.copy() num_labels = [len(trainset.labels_to_ints[c]) for c in classes] # PARAMETERS use_cuda = args.use_gpu and torch.cuda.is_available() device_nb = args.device if use_cuda: torch.cuda.set_device(device_nb) torch.cuda.manual_seed_all(args.seed) # INITIALIZE NETWORK if args.model.lower() not in ['mobilenet_v2', 'vgg16_bn']: raise NotImplementedError('Unknown Model {}\n\t+ Choose from: [mobilenet_v2, vgg16_bn].' .format(args.model)) elif args.model.lower() == 'mobilenet_v2': featurenet = mobilenet_v2(pretrained=True) elif args.model.lower() == 'vgg16_bn': featurenet = vgg16_bn(pretrained=True) if args.not_narrow: bodynet = featurenet else: bodynet = narrownet(featurenet, dim_feature_out=args.feature_dim) net = OctopusNet(bodynet, n_labels=num_labels) n_parameters = sum([p.data.nelement() for p in net.parameters() if p.requires_grad]) if use_cuda: net = net.cuda() print('Using {}\n\t+ Number of params: {}'.format(str(bodynet).split('(')[0], n_parameters)) # LOG/SAVE OPTIONS log_interval = args.log_interval log_dir = args.log_dir if not os.path.isdir(log_dir): os.makedirs(log_dir) # tensorboard summary writerR timestamp = time.strftime('%m-%d-%H-%M') if args.shape_dataset: expname = timestamp + '_ShapeDataset_' + str(bodynet).split('(')[0] else: expname = timestamp + '_' + str(bodynet).split('(')[0] if args.exp_name is not None: expname = expname + '_' + args.exp_name log = TBPlotter(os.path.join(log_dir, 'tensorboard', expname)) log.print_logdir() # allow auto-tuner to find best algorithm for the hardware cudnn.benchmark = True write_config(args, os.path.join(log_dir, expname)) # ININTIALIZE TRAINING optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10, threshold=1e-1, verbose=True) criterion = nn.CrossEntropyLoss() if use_cuda: criterion = criterion.cuda() kwargs = {'num_workers': 8, 'pin_memory': True} if use_cuda else {} trainloader = DataLoader(trainset, batch_size=args.batch_size, shuffle=True, **kwargs) valloader = DataLoader(valset, batch_size=args.batch_size, shuffle=True, **kwargs) # optionally resume from a checkpoint start_epoch = 1 if args.chkpt is not None: if os.path.isfile(args.chkpt): print("=> loading checkpoint '{}'".format(args.chkpt)) checkpoint = torch.load(args.chkpt, map_location=lambda storage, loc: storage) start_epoch = checkpoint['epoch'] best_acc_score = checkpoint['best_acc_score'] best_acc = checkpoint['acc'] net.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.chkpt, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.chkpt)) def train(epoch): losses = AverageMeter() accs = AverageMeter() class_acc = [AverageMeter() for i in range(len(classes))] # switch to train mode net.train() for batch_idx, (data, target) in enumerate(trainloader): if use_cuda: data, target = Variable(data.cuda()), [Variable(t.cuda()) for t in target] else: data, target = Variable(data), [Variable(t) for t in target] # compute output outputs = net(data) preds = [torch.max(outputs[i], 1)[1] for i in range(len(classes))] loss = Variable(torch.Tensor([0])).type_as(data[0]) for i, o, t, p in zip(range(len(classes)), outputs, target, preds): # in case of None labels mask = t != -1 if mask.sum() == 0: continue o, t, p = o[mask], t[mask], p[mask] loss += criterion(o, t) # measure class accuracy and record loss class_acc[i].update((torch.sum(p == t).type(torch.FloatTensor) / t.size(0)).data) accs.update(torch.mean(torch.stack([class_acc[i].val for i in range(len(classes))])), target[0].size(0)) losses.update(loss.data, target[0].size(0)) # compute gradient and do optimizer step optimizer.zero_grad() loss.backward() optimizer.step() if batch_idx % log_interval == 0: print('Train Epoch: {} [{}/{}]\t' 'Loss: {:.4f} ({:.4f})\t' 'Acc: {:.2f}% ({:.2f}%)'.format( epoch, batch_idx * len(target), len(trainloader.dataset), float(losses.val), float(losses.avg), float(accs.val) * 100., float(accs.avg) * 100.)) print('\t' + '\n\t'.join(['{}: {:.2f}%'.format(classes[i], float(class_acc[i].val) * 100.) for i in range(len(classes))])) # log avg values to somewhere log.write('loss', float(losses.avg), epoch, test=False) log.write('acc', float(accs.avg), epoch, test=False) for i in range(len(classes)): log.write('class_acc', float(class_acc[i].avg), epoch, test=False) def test(epoch): losses = AverageMeter() accs = AverageMeter() class_acc = [AverageMeter() for i in range(len(classes))] # switch to evaluation mode net.eval() for batch_idx, (data, target) in enumerate(valloader): if use_cuda: data, target = Variable(data.cuda()), [Variable(t.cuda()) for t in target] else: data, target = Variable(data), [Variable(t) for t in target] # compute output outputs = net(data) preds = [torch.max(outputs[i], 1)[1] for i in range(len(classes))] loss = Variable(torch.Tensor([0])).type_as(data[0]) for i, o, t, p in zip(range(len(classes)), outputs, target, preds): # in case of None labels mask = t != -1 if mask.sum() == 0: continue o, t, p = o[mask], t[mask], p[mask] loss += criterion(o, t) # measure class accuracy and record loss class_acc[i].update((torch.sum(p == t).type(torch.FloatTensor) / t.size(0)).data) accs.update(torch.mean(torch.stack([class_acc[i].val for i in range(len(classes))])), target[0].size(0)) losses.update(loss.data, target[0].size(0)) score = accs.avg - torch.std(torch.stack([class_acc[i].avg for i in range( len(classes))])) / accs.avg # compute mean - std/mean as measure for accuracy print('\nVal set: Average loss: {:.4f} Average acc {:.2f}% Acc score {:.2f} LR: {:.6f}' .format(float(losses.avg), float(accs.avg) * 100., float(score), optimizer.param_groups[-1]['lr'])) print('\t' + '\n\t'.join(['{}: {:.2f}%'.format(classes[i], float(class_acc[i].avg) * 100.) for i in range(len(classes))])) log.write('loss', float(losses.avg), epoch, test=True) log.write('acc', float(accs.avg), epoch, test=True) for i in range(len(classes)): log.write('class_acc', float(class_acc[i].avg), epoch, test=True) return losses.avg.cpu().numpy(), float(score), float(accs.avg), [float(class_acc[i].avg) for i in range(len(classes))] if start_epoch == 1: # compute baseline: _, best_acc_score, best_acc, _ = test(epoch=0) else: # checkpoint was loaded best_acc_score = best_acc_score best_acc = best_acc for epoch in range(start_epoch, args.epochs + 1): # train for one epoch train(epoch) # evaluate on validation set val_loss, val_acc_score, val_acc, val_class_accs = test(epoch) scheduler.step(val_loss) # remember best acc and save checkpoint is_best = val_acc_score > best_acc_score best_acc_score = max(val_acc_score, best_acc_score) save_checkpoint({ 'epoch': epoch, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'best_acc_score': best_acc_score, 'acc': val_acc, 'class_acc': {c: a for c, a in zip(classes, val_class_accs)} }, is_best, expname, directory=log_dir) if val_acc > best_acc: shutil.copyfile(os.path.join(log_dir, expname + '_checkpoint.pth.tar'), os.path.join(log_dir, expname + '_model_best_mean_acc.pth.tar')) best_acc = max(val_acc, best_acc) if optimizer.param_groups[-1]['lr'] < 1e-5: print('Learning rate reached minimum threshold. End training.') break # report best values try: best = torch.load(os.path.join(log_dir, expname + '_model_best.pth.tar'), map_location=lambda storage, loc: storage) except IOError: # could be only one task best = torch.load(os.path.join(log_dir, expname + '_model_best_mean_acc.pth.tar'), map_location=lambda storage, loc: storage) print('Finished training after epoch {}:\n\tbest acc score: {}\n\tacc: {}\n\t class acc: {}' .format(best['epoch'], best['best_acc_score'], best['acc'], best['class_acc'])) print('Best model mean accuracy: {}'.format(best_acc)) try: shutil.copyfile(os.path.join(log_dir, expname + '_model_best.pth.tar'), os.path.join('models', expname + '_model_best.pth.tar')) except IOError: # could be only one task shutil.copyfile(os.path.join(log_dir, expname + '_model_best_mean_acc.pth.tar'), os.path.join('models', expname + '_model_best.pth.tar'))
def main(): # LOAD DATASET stat_file = args.stat_file with open(stat_file, 'r') as f: data = pickle.load(f) mean, std = data['mean'], data['std'] mean = [float(m) for m in mean] std = [float(s) for s in std] normalize = transforms.Normalize(mean=mean, std=std) img_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor(), normalize]) if args.shape_dataset: classes = ['shape'] dataset = ShapeDataset( root_dir='/export/home/kschwarz/Documents/Data/Geometric_Shapes', split=args.info_file, classes=classes, transform=img_transform) elif args.stl_dataset: dataset = STL(transform=img_transform, test='test' in args.info_file.split('/')[-1]) else: dataset = Wikiart(path_to_info_file=args.info_file, path_to_images=args.im_path, classes=['image_id'], transform=img_transform) # PARAMETERS use_cuda = args.use_gpu and torch.cuda.is_available() device_nb = args.device if use_cuda: torch.cuda.set_device(device_nb) # INITIALIZE NETWORK if args.model.lower() not in ['mobilenet_v2', 'vgg16_bn']: raise NotImplementedError( 'Unknown Model {}\n\t+ Choose from: [mobilenet_v2, vgg16_bn].'. format(args.model)) elif args.model.lower() == 'mobilenet_v2': featurenet = mobilenet_v2(pretrained=True) elif args.model.lower() == 'vgg16_bn': featurenet = vgg16_bn(pretrained=True) if args.not_narrow: net = featurenet else: net = narrownet(featurenet, dim_feature_out=args.feature_dim) if use_cuda: net = net.cuda() if args.weight_file is not None: remove_fc(net, inplace=True) else: make_featurenet(net, inplace=True) print('Extract features using {}.'.format(str(net))) if args.weight_file: pretrained_dict = load_weights(args.weight_file, net.state_dict(), prefix_file='bodynet.') net.load_state_dict(pretrained_dict) if use_cuda: net = net.cuda() kwargs = {'num_workers': 8} if use_cuda else {} loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, **kwargs) net.eval() features = [] for i, data in enumerate(loader): if isinstance(data, tuple) or isinstance( data, list): # loader returns data, label data = data[0] if (i + 1) % 10 == 0: print('{}/{}'.format(i + 1, len(loader))) input = Variable(data, requires_grad=False) if not use_cuda else Variable( data.cuda(), requires_grad=False) output = net(input) features.append(output.data.cpu()) features = torch.cat(features) features = features.numpy() image_names = dataset.df['image_id'].values.astype(str) if not os.path.isdir(args.output_dir): os.makedirs(args.output_dir) expname = '' if args.exp_name is None else '_' + args.exp_name if args.shape_dataset: outfile = os.path.join( args.output_dir, 'ShapeDataset_' + str(net).split('(')[0] + '_' + args.info_file.split('/')[-1].split('.')[0] + expname + '.hdf5') elif args.office_dataset: outfile = os.path.join( args.output_dir, 'OfficeDataset_' + str(net).split('(')[0] + '_' + args.info_file.split('/')[-1].split('.')[0] + expname + '.hdf5') elif args.bam_dataset: outfile = os.path.join( args.output_dir, 'BAMDataset_' + str(net).split('(')[0] + '_' + args.info_file.split('/')[-1].split('.')[0] + expname + '.hdf5') elif args.stl_dataset: outfile = os.path.join( args.output_dir, 'STLDataset_' + str(net).split('(')[0] + '_' + args.info_file.split('/')[-1].split('.')[0] + expname + '.hdf5') else: outfile = os.path.join( args.output_dir, str(net).split('(')[0] + '_' + args.info_file.split('/')[-1].split('.')[0] + expname + '.hdf5') with h5py.File(outfile, 'w') as f: f.create_dataset('features', features.shape, dtype=features.dtype, data=features) f.create_dataset('image_names', image_names.shape, dtype=image_names.dtype, data=image_names) print('Saved features to {}'.format(outfile))