def main(): model = FaceNetModel(embedding_size=args.embedding_size, num_classes=args.num_classes).to(device) optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) scheduler = lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1) if args.start_epoch != 0: checkpoint = torch.load( './log/checkpoint_epoch{}.pth'.format(args.start_epoch - 1)) model.load_state_dict(checkpoint['state_dict']) for epoch in range(args.start_epoch, args.num_epochs + args.start_epoch): print(80 * '=') print('Epoch [{}/{}]'.format(epoch, args.num_epochs + args.start_epoch - 1)) data_loaders, data_size = get_dataloader( args.train_root_dir, args.valid_root_dir, args.train_csv_name, args.valid_csv_name, args.num_train_triplets, args.num_valid_triplets, args.batch_size, args.num_workers) print("load:", data_size) for phase in ['train', 'valid']: print(phase, len(data_loaders[phase])) train_valid(model, optimizer, scheduler, epoch, data_loaders, data_size) print(80 * '=')
def main(): model = FaceNetModel(embedding_size=args.embedding_size, num_classes=args.num_classes).to(device) optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) scheduler = lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.2) if args.start_epoch != 0: checkpoint = torch.load( './log/checkpoint_epoch{}.pth'.format(args.start_epoch - 1)) model.load_state_dict(checkpoint['state_dict']) data_loaders, data_size = get_dataloader( args.train_root_dir, args.valid_root_dir, args.train_csv_name, args.valid_csv_name, args.num_train_triplets, args.num_valid_triplets, args.batch_size, args.num_workers) for epoch in range(args.start_epoch, args.num_epochs + args.start_epoch): print(80 * '=') print(datetime.datetime.now().time()) print('Epoch [{}/{}]'.format(epoch, args.num_epochs + args.start_epoch - 1)) if ((epoch + 1) % 10 == 0): data_loaders['train'].dataset.advance_to_the_next_subset() data_loaders['valid'].dataset.advance_to_the_next_subset() train_valid(model, optimizer, scheduler, epoch, data_loaders, data_size) print(80 * '=')
def create_embedding(model_path, face_images): ##Generate face embedding database to register the face using the trained model ##The embedding database is n*128 numpy array and is saved to out_emb_path parser = argparse.ArgumentParser(description = 'Face Recognition using Triplet Loss') parser.add_argument('--start-epoch', default = 1, type = int, metavar = 'SE', help = 'start epoch (default: 0)') parser.add_argument('--num-epochs', default = 40, type = int, metavar = 'NE', help = 'number of epochs to train (default: 200)') parser.add_argument('--num-classes', default = 4000, type = int, metavar = 'NC', help = 'number of clases (default: 10000)') parser.add_argument('--num-train-triplets', default = 5000, type = int, metavar = 'NTT', help = 'number of triplets for training (default: 10000)') parser.add_argument('--num-valid-triplets', default = 5000, type = int, metavar = 'NVT', help = 'number of triplets for vaidation (default: 10000)') parser.add_argument('--embedding-size', default = 128, type = int, metavar = 'ES', help = 'embedding size (default: 128)') parser.add_argument('--batch-size', default = 50, type = int, metavar = 'BS', help = 'batch size (default: 128)') parser.add_argument('--num-workers', default = 0, type = int, metavar = 'NW', help = 'number of workers (default: 0)') parser.add_argument('--learning-rate', default = 0.001, type = float, metavar = 'LR', help = 'learning rate (default: 0.001)') parser.add_argument('--margin', default = 0.5, type = float, metavar = 'MG', help = 'margin (default: 0.5)') parser.add_argument('--train-root-dir', default = 'TrainingData', type = str, help = 'path to train root dir') parser.add_argument('--valid-root-dir', default = 'ValidatingData', type = str, help = 'path to valid root dir') parser.add_argument('--train-csv-name', default = 'TrainingData.csv', type = str, help = 'list of training images') parser.add_argument('--valid-csv-name', default = 'ValidatingData.csv', type = str, help = 'list of validtion images') args = parser.parse_args() device = torch.device('cpu' if torch.cuda.is_available() else 'cpu') model = FaceNetModel(embedding_size = args.embedding_size, num_classes = args.num_classes).to(device) checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['state_dict']) data_transforms = transforms.Compose([ transforms.ToPILImage(), transforms.ToTensor(), transforms.Normalize(mean = [0.5, 0.5, 0.5], std = [0.5, 0.5, 0.5])]) images = [] for i in range(face_images.shape[0]): image = data_transforms(face_images[i,:,:,:]) images.append(image) images = torch.stack(images).to(device) with torch.no_grad(): emb = model(images).detach().numpy() return emb
def __init__(self, weights): super(Recognition, self).__init__() torch.backends.cudnn.benchmark = True self.weights = weights self.model = FaceNetModel(embedding_size=128, num_classes=10000) self.device, device_ids = self._prepare_device([0]) # self.device = torch.device('cpu') self.model = self.model.to(self.device) if len(device_ids) > 1: self.model = torch.nn.DataParallel(self.model, device_ids=device_ids) self.transforms = get_transforms(phase='valid', width=224, height=224) if self.weights is not None: print('Load Checkpoint') checkpoint = torch.load(weights, map_location=lambda storage, loc: storage) self.model.load_state_dict(checkpoint['state_dict']) self.model.eval() # init self.image = torch.FloatTensor(1, 3, 224, 224) self.image = self.image.to(self.device)
def main(): print("Start date and time: ", time.asctime(time.localtime(time.time()))) print("arguments: ", args) #load model model = FaceNetModel(embedding_size=args.embedding_size, num_classes=args.num_classes).to(device) optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) scheduler = lr_scheduler.StepLR(optimizer, step_size=args.learning_rate / 4, gamma=0.1) if not os.path.isdir( log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) if args.start_epoch != 0: checkpoint = torch.load('{}/checkpoint_epoch{}.pth'.format( log_dir, args.start_epoch - 1)) model.load_state_dict(checkpoint['state_dict']) #if args.torchvision_dataset: #create csv # dataset_path = os.path.join(os.path.expanduser(datasets_path), args.dataset_name) # dataloaders, dataset_sizes, num_classes = load_torchvision_data(args.dataset_name, dataset_path, data_transforms, dataset_depth) array = [{ "dir": "/lustre2/0/wsdarts/datasets/omniglot_multiple_folders_split/val/Japanese_(hiragana)/", "csv": "/nfs/home4/mhouben/facenet_pytorch/datasets/omniglot_alphabet_csvs/val/Japanese_(hiragana).csv" }, { "dir": "/lustre2/0/wsdarts/datasets/omniglot_multiple_folders_split/val/Japanese_(katakana)/", "csv": "/nfs/home4/mhouben/facenet_pytorch/datasets/omniglot_alphabet_csvs/val/Japanese_(katakana).csv" }, { "dir": "/lustre2/0/wsdarts/datasets/omniglot_multiple_folders_split/val/Korean/", "csv": "/nfs/home4/mhouben/facenet_pytorch/datasets/omniglot_alphabet_csvs/val/Korean.csv" }, { "dir": "/lustre2/0/wsdarts/datasets/omniglot_multiple_folders_split/val/Latin/", "csv": "/nfs/home4/mhouben/facenet_pytorch/datasets/omniglot_alphabet_csvs/val/Latin.csv" }, { "dir": "/lustre2/0/wsdarts/datasets/omniglot_multiple_folders_split/val/N_Ko/", "csv": "/nfs/home4/mhouben/facenet_pytorch/datasets/omniglot_alphabet_csvs/val/N_Ko.csv" }, { "dir": "/lustre2/0/wsdarts/datasets/omniglot_multiple_folders_split/val/Greek/", "csv": "/nfs/home4/mhouben/facenet_pytorch/datasets/omniglot_alphabet_csvs/val/Greek.csv" }] if args.pure_valid_six_time: for a in array: for epoch in range(args.start_epoch, num_epochs + args.start_epoch): t = time.time() if not args.pure_validation: print(80 * '=') print('Epoch [{}/{}]'.format( epoch, num_epochs + args.start_epoch - 1)) # load data (every epoch) print("a[dir]", a["dir"]) print("a[csv]", a["csv"]) data_loaders, data_size = get_dataloader( args.train_root_dir, a["dir"], args.train_csv_name, a["csv"], args.num_train_triplets, args.num_valid_triplets, args.batch_size, args.num_workers, args.train_format, args.valid_format, args.train_dataset_depth, args.val_dataset_depth, args.train_torchvision, args.val_torchvision, 224, 224, args.pure_validation, args.pure_training) # training and validation train_valid(model, optimizer, scheduler, epoch, data_loaders, data_size, t) print("duration of epoch ", epoch, ": ", time.time() - t, " seconds") print(80 * '=') print("End date and time: ", time.asctime(time.localtime(time.time()))) else: for epoch in range(args.start_epoch, num_epochs + args.start_epoch): t = time.time() if not args.pure_validation: print(80 * '=') print('Epoch [{}/{}]'.format(epoch, num_epochs + args.start_epoch - 1)) # load data (every epoch) data_loaders, data_size = get_dataloader( args.train_root_dir, args.valid_root_dir, args.train_csv_name, args.valid_csv_name, args.num_train_triplets, args.num_valid_triplets, args.batch_size, args.num_workers, args.train_format, args.valid_format, args.train_dataset_depth, args.val_dataset_depth, args.train_torchvision, args.val_torchvision, 224, 224, args.pure_validation, args.pure_training) # training and validation train_valid(model, optimizer, scheduler, epoch, data_loaders, data_size, t) print("duration of epoch ", epoch, ": ", time.time() - t, " seconds") print(80 * '=') print("End date and time: ", time.asctime(time.localtime(time.time())))
def train(): model = FaceNetModel(embedding_size=args.embedding_size, num_classes=args.num_classes).to(device) optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) scheduler = lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1) if args.start_epoch != 0: checkpoint = torch.load( './log/checkpoint_epoch{}.pth'.format(args.start_epoch - 1)) model.load_state_dict(checkpoint['state_dict']) train_loss = np.zeros((args.num_epochs)) train_accuracy = np.zeros((args.num_epochs)) for epoch in range(args.start_epoch, args.num_epochs + args.start_epoch): print(80 * '-') print('Epoch [{}/{}]'.format( epoch, args.num_epochs + args.start_epoch - 1)) data_transforms = { 'train': transforms.Compose([ transforms.ToPILImage(), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]), 'valid': transforms.Compose([ transforms.ToPILImage(), transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) } face_dataset = { 'train': TripletFaceDataset(root_dir=args.train_root_dir, csv_name=args.train_csv_name, num_triplets=args.num_train_triplets, transform=data_transforms['train']), 'valid': TripletFaceDataset(root_dir=args.valid_root_dir, csv_name=args.valid_csv_name, num_triplets=args.num_valid_triplets, transform=data_transforms['valid']) } dataloaders = { x: torch.utils.data.DataLoader(face_dataset[x], batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) for x in ['train', 'valid'] } data_size = {x: len(face_dataset[x]) for x in ['train', 'valid']} for phase in ['train', 'valid']: labels, distances = [], [] triplet_loss_sum = 0.0 if phase == 'train': scheduler.step() model.train() else: model.eval() for batch_idx, batch_sample in enumerate(dataloaders[phase]): anc_img = batch_sample['anc_img'].to(device) pos_img = batch_sample['pos_img'].to(device) neg_img = batch_sample['neg_img'].to(device) # print(anc_img.shape) pos_cls = batch_sample['pos_class'].to(device) neg_cls = batch_sample['neg_class'].to(device) with torch.set_grad_enabled(phase == 'train'): # anc_embed, pos_embed and neg_embed are embedding of image anc_embed, pos_embed, neg_embed = model( anc_img), model(pos_img), model(neg_img) # print(anc_embed.shape) # choose the hard negatives only for "training" pos_dist = l2_dist.forward(anc_embed, pos_embed) neg_dist = l2_dist.forward(anc_embed, neg_embed) all = (neg_dist - pos_dist < args.margin).cpu().numpy().flatten() if phase == 'train': hard_triplets = np.where(all == 1) if len(hard_triplets[0]) == 0: continue else: hard_triplets = np.where(all >= 0) anc_hard_embed = anc_embed[hard_triplets].to(device) pos_hard_embed = pos_embed[hard_triplets].to(device) neg_hard_embed = neg_embed[hard_triplets].to(device) anc_hard_img = anc_img[hard_triplets].to(device) pos_hard_img = pos_img[hard_triplets].to(device) neg_hard_img = neg_img[hard_triplets].to(device) pos_hard_cls = pos_cls[hard_triplets].to(device) neg_hard_cls = neg_cls[hard_triplets].to(device) anc_img_pred = model.forward_classifier( anc_hard_img).to(device) pos_img_pred = model.forward_classifier( pos_hard_img).to(device) neg_img_pred = model.forward_classifier( neg_hard_img).to(device) triplet_loss = TripletLoss(args.margin).forward( anc_hard_embed, pos_hard_embed, neg_hard_embed).to(device) if phase == 'train': optimizer.zero_grad() triplet_loss.backward() optimizer.step() dists = l2_dist.forward(anc_embed, pos_embed) distances.append(dists.data.cpu().numpy()) labels.append(np.ones(dists.size(0))) dists = l2_dist.forward(anc_embed, neg_embed) distances.append(dists.data.cpu().numpy()) labels.append(np.zeros(dists.size(0))) triplet_loss_sum += triplet_loss.item() torch.cuda.empty_cache() avg_triplet_loss = triplet_loss_sum / data_size[phase] labels = np.array( [sublabel for label in labels for sublabel in label]) distances = np.array( [subdist for dist in distances for subdist in dist]) tpr, fpr, accuracy, val, val_std, far = evaluate( distances, labels) print(' {} set - Triplet Loss = {:.8f}'.format( phase, avg_triplet_loss)) print(' {} set - Accuracy = {:.8f}'.format( phase, np.mean(accuracy))) with open('./log/{}_log.txt'.format(phase), 'a') as f: f.write( str(epoch) + '\t' + str(np.mean(accuracy)) + '\t' + str(avg_triplet_loss)) f.write("\n") if phase == 'train': torch.save( { 'epoch': epoch, 'state_dict': model.state_dict() }, 'log/checkpoint_epoch{}.pth'.format(epoch)) train_loss[epoch] = avg_triplet_loss if phase == 'valid': train_accuracy[epoch] = np.mean(accuracy) print(80 * '-') torch.save(model, 'model.pkl') return train_loss, train_accuracy
def create_embedding(model_path, emb_face_dir, out_emb_path, out_filename): ##Generate face embedding database to register the face using the trained model ##The embedding database is n*128 numpy array and is saved to out_emb_path parser = argparse.ArgumentParser( description='Face Recognition using Triplet Loss') parser.add_argument('--start-epoch', default=1, type=int, metavar='SE', help='start epoch (default: 0)') parser.add_argument('--num-epochs', default=40, type=int, metavar='NE', help='number of epochs to train (default: 200)') parser.add_argument('--num-classes', default=4000, type=int, metavar='NC', help='number of clases (default: 10000)') parser.add_argument( '--num-train-triplets', default=5000, type=int, metavar='NTT', help='number of triplets for training (default: 10000)') parser.add_argument( '--num-valid-triplets', default=5000, type=int, metavar='NVT', help='number of triplets for vaidation (default: 10000)') parser.add_argument('--embedding-size', default=128, type=int, metavar='ES', help='embedding size (default: 128)') parser.add_argument('--batch-size', default=50, type=int, metavar='BS', help='batch size (default: 128)') parser.add_argument('--num-workers', default=0, type=int, metavar='NW', help='number of workers (default: 0)') parser.add_argument('--learning-rate', default=0.001, type=float, metavar='LR', help='learning rate (default: 0.001)') parser.add_argument('--margin', default=0.5, type=float, metavar='MG', help='margin (default: 0.5)') parser.add_argument('--train-root-dir', default='TrainingData', type=str, help='path to train root dir') parser.add_argument('--valid-root-dir', default='ValidatingData', type=str, help='path to valid root dir') parser.add_argument('--train-csv-name', default='TrainingData.csv', type=str, help='list of training images') parser.add_argument('--valid-csv-name', default='ValidatingData.csv', type=str, help='list of validtion images') args = parser.parse_args() device = torch.device('cpu' if torch.cuda.is_available() else 'cpu') model = FaceNetModel(embedding_size=args.embedding_size, num_classes=args.num_classes).to(device) checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['state_dict']) emb_face_dir = 'dataset/FaceImgData' image_list, names_list = file_processing.gen_files_labels(emb_face_dir, postfix='jpg') data_transforms = transforms.Compose([ transforms.ToPILImage(), transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) images = [] for image_path in image_list: image_path_os = os.path.join(str(image_path)) image = io.imread(image_path_os) image = data_transforms(image) images.append(image) images = torch.stack(images).to(device) with torch.no_grad(): compare_emb = model(images).detach().numpy() print("Finish resigter!") np.save(out_emb_path, compare_emb) file_processing.write_data(out_filename, names_list, model='w')
def main(): init_log_just_created("log/valid.csv") init_log_just_created("log/train.csv") import pandas as pd valid = pd.read_csv('log/valid.csv') max_acc = valid['acc'].max() pretrain = args.pretrain fc_only = args.fc_only except_fc = args.except_fc train_all = args.train_all unfreeze = args.unfreeze.split(',') freeze = args.freeze.split(',') start_epoch = 0 print(f"Transfer learning: {pretrain}") print("Train fc only:", fc_only) print("Train except fc:", except_fc) print("Train all layers:", train_all) print("Unfreeze only:", ', '.join(unfreeze)) print("Freeze only:", ', '.join(freeze)) print(f"Max acc: {max_acc:.4f}") print(f"Learning rate will decayed every {args.step_size}th epoch") model = FaceNetModel(pretrained=pretrain) model.to(device) triplet_loss = TripletLoss(args.margin).to(device) if fc_only: model.unfreeze_only(['fc', 'classifier']) if except_fc: model.freeze_only(['fc', 'classifier']) if train_all: model.unfreeze_all() if len(unfreeze) > 0: model.unfreeze_only(unfreeze) if len(freeze) > 0: model.freeze_only(freeze) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.learning_rate) scheduler = lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=0.1) if args.load_best or args.load_last: checkpoint = './log/best_state.pth' if args.load_best else './log/last_checkpoint.pth' print('loading', checkpoint) checkpoint = torch.load(checkpoint) modelsaver.current_acc = max_acc start_epoch = checkpoint['epoch'] + 1 model.load_state_dict(checkpoint['state_dict']) print("Stepping scheduler") try: optimizer.load_state_dict(checkpoint['optimizer_state']) except ValueError as e: print("Can't load last optimizer") print(e) if args.continue_step: scheduler.step(checkpoint['epoch']) print(f"Loaded checkpoint epoch: {checkpoint['epoch']}\n" f"Loaded checkpoint accuracy: {checkpoint['accuracy']}\n" f"Loaded checkpoint loss: {checkpoint['loss']}") model = torch.nn.DataParallel(model) for epoch in range(start_epoch, args.num_epochs + start_epoch): print(80 * '=') print('Epoch [{}/{}]'.format(epoch, args.num_epochs + start_epoch - 1)) time0 = time.time() data_loaders, data_size = get_dataloader(args.train_root_dir, args.valid_root_dir, args.train_csv_name, args.valid_csv_name, args.num_train_triplets, args.num_valid_triplets, args.batch_size, args.num_workers) train_valid(model, optimizer, triplet_loss, scheduler, epoch, data_loaders, data_size) print(f' Execution time = {time.time() - time0}') print(80 * '=')
class Recognition(object): def __init__(self, weights): super(Recognition, self).__init__() torch.backends.cudnn.benchmark = True self.weights = weights self.model = FaceNetModel(embedding_size=128, num_classes=10000) self.device, device_ids = self._prepare_device([0]) # self.device = torch.device('cpu') self.model = self.model.to(self.device) if len(device_ids) > 1: self.model = torch.nn.DataParallel(self.model, device_ids=device_ids) self.transforms = get_transforms(phase='valid', width=224, height=224) if self.weights is not None: print('Load Checkpoint') checkpoint = torch.load(weights, map_location=lambda storage, loc: storage) self.model.load_state_dict(checkpoint['state_dict']) self.model.eval() # init self.image = torch.FloatTensor(1, 3, 224, 224) self.image = self.image.to(self.device) def process(self, images): cpu_image = self.transforms(image=images)['image'].unsqueeze(0) self.loadData(self.image, cpu_image) output = self.model(self.image) return output def train_cls(self): knn_clf = neighbors.KNeighborsClassifier(n_neighbors=3, weights='distance') clf = SVC(gamma='auto') df = pd.read_csv('./data/HandInfo.csv') df = df.reset_index(drop=True) X = [] y = [] train_df, val_df = train_test_split(df, test_size=0.1, stratify=df['id']) train_df = train_df.reset_index(drop=True) val_df = val_df.reset_index(drop=True) for idx in tqdm(range(len(train_df))): path = os.path.join('./data/Hands', train_df.loc[idx, 'imageName']) feature = np.squeeze( self.process(cv2.imread(path)).cpu().data.numpy()) X.append(feature) y.append(train_df.loc[idx, 'id']) # knn_clf.fit(X, y) clf.fit(X, y) correct = 0 total = 0 for idx in tqdm(range(len(val_df))): path = os.path.join('./data/Hands', val_df.loc[idx, 'imageName']) feature = self.process(cv2.imread(path)).cpu().data.numpy() pred = clf.predict(feature) lab = val_df.loc[idx, 'id'] if pred[0] == lab: correct += 1 total += 1 print(correct * 100 / total) @staticmethod def loadData(v, data): with torch.no_grad(): v.resize_(data.size()).copy_(data) @staticmethod def _prepare_device(device): if type(device) == int: n_gpu_use = device else: n_gpu_use = len(device) n_gpu = torch.cuda.device_count() if n_gpu_use > 0 and n_gpu == 0: print( "Warning: There\'s no GPU available on this machine, training will be performed on CPU." ) n_gpu_use = 0 if n_gpu_use > n_gpu: print( "Warning: The number of GPU\'s configured to use is {}, but only {} are available on this machine." .format(n_gpu_use, n_gpu)) n_gpu_use = n_gpu if type(device) == int: device = torch.device('cuda:0' if n_gpu_use > 0 else 'cpu') list_ids = list(range(n_gpu_use)) elif len(device) == 1: list_ids = device if device[0] >= 0 and device[0] < n_gpu: device = torch.device('cuda:{}'.format(device[0])) else: device = torch.device('cpu') else: list_ids = device device = torch.device( 'cuda:{}'.format(device[0]) if n_gpu_use > 0 else 'cpu') return device, list_ids
import torch import torchvision from models import FaceNetModel device = torch.device('cpu') model = FaceNetModel(64, 40000).to(device) checkpoint = torch.load('C:/a/facenet-light/log/checkpoint_epoch299.pth') model.load_state_dict(checkpoint['state_dict']) #prune last layer: norm as not supported by onnx->opencv model.l2_norm = lambda x: x model.eval() dummy_input = torch.randn(1, 3, 96, 96, device = 'cpu') input_names = ['in'] output_names = ['out'] torch.onnx.export(model, dummy_input, "output9.onnx", verbose=True, input_names=input_names, output_names=output_names)
def __init__(self, model_path, threshold=1.1): self.model = FaceNetModel(str(model_path)) self.maigo_db = MaigoDataBase() self.cameras = [] self.threshold = threshold