def predict(video_path, weight_path, absolute_max_string_len=32, output_size=28): print("\nLoading data from disk...") video = Video(vtype='face', face_predictor_path=FACE_PREDICTOR_PATH) if os.path.isfile(video_path): video.from_video(video_path) else: video.from_frames(video_path) print("Data loaded.\n") if K.image_data_format() == 'channels_first': img_c, frames_n, img_w, img_h = video.data.shape else: frames_n, img_w, img_h, img_c = video.data.shape lipnet = LipNet(img_c=img_c, img_w=img_w, img_h=img_h, frames_n=frames_n, absolute_max_string_len=absolute_max_string_len, output_size=output_size) adam = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08) lipnet.model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=adam) lipnet.model.load_weights(weight_path) spell = Spell(path=PREDICT_DICTIONARY) decoder = Decoder(greedy=PREDICT_GREEDY, beam_width=PREDICT_BEAM_WIDTH, postprocessors=[labels_to_text, spell.sentence]) X_data = np.array([video.data]).astype(np.float32) / 255 input_length = np.array([len(video.data)]) y_pred = lipnet.predict(X_data) result = decoder.decode(y_pred, input_length)[0] return (video, result)
def __init__(self, config): self.config = config self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.train_data = Dataset(num_of_frame=config.num_of_frame, root=config.data_path, mode='train') self.train_loader = get_loader(dataset=self.train_data, batch_size=config.batch_size, shuffle=True, drop_last=True) self.val_data = Dataset(num_of_frame=config.num_of_frame, root=config.data_path, mode='val') self.val_loader = get_loader(dataset=self.val_data, batch_size=config.batch_size, shuffle=True, drop_last=True) # self.test_data self.lipnet = LipNet(config.vocab_size).to(self.device) self.ctc_loss = CTCLoss() self.optim = torch.optim.Adam(self.lipnet.parameters(), config.learning_rate)
class Solver(): def __init__(self, config): self.config = config self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.train_data = Dataset(num_of_frame=config.num_of_frame, root=config.data_path, mode='train') self.train_loader = get_loader(dataset=self.train_data, batch_size=config.batch_size, shuffle=True, drop_last=True) self.val_data = Dataset(num_of_frame=config.num_of_frame, root=config.data_path, mode='val') self.val_loader = get_loader(dataset=self.val_data, batch_size=config.batch_size, shuffle=True, drop_last=True) # self.test_data self.lipnet = LipNet(config.vocab_size).to(self.device) self.ctc_loss = CTCLoss() self.optim = torch.optim.Adam(self.lipnet.parameters(), config.learning_rate) def fit(self): for epoch in range(1, self.config.epoch + 1): for step, (frames, labels, frame_lens, label_lens, text) in enumerate(self.train_loader): frames = frames.to(self.device) labels = labels frame_lens = frame_lens label_lens = label_lens output = self.lipnet(frames) acts = output.permute(1, 0, 2).contiguous().cpu() # (75, N, 28) loss = self.ctc_loss(acts, labels, frame_lens, label_lens) loss = loss.mean() if not torch.isnan(loss) and -1000000 <= loss <= 1000000: loss.backward() self.optim.step() else: print('Skip NaN loss.') continue print('Epoch[{}/{}] Step[{}/{}] Loss: {:.8f} LR: {:.8f}'. format( epoch, self.config.epoch, step + 1, self.train_data.__len__() // self.config.batch_size, loss.item(), self.optim.param_groups[0]['lr'])) if epoch % self.config.save_every == 0: self.save(epoch) if epoch > self.config.decay_after and ( epoch - self.config.decay_after) % self.config.decay_every == 1: self.update_optim() self.validation() self.lipnet.train() def save(self, epoch): checkpoint = {'net': self.lipnet.state_dict(), 'config': self.config} os.makedirs(self.config.save_dir, exist_ok=True) output_path = os.path.join(self.config.save_dir, 'model_{}'.format(epoch)) torch.save(checkpoint, output_path) def update_optim(self): old_lr = self.optim.param_groups[0]['lr'] new_lr = old_lr * self.config.decay_rate self.optim = torch.optim.Adam(self.lipnet.parameters(), new_lr) def validation(self): self.lipnet.eval() pass
(loss, wer, cer) = test(model, net) print('i_iter={},lr={},loss={},wer={},cer={}' .format(tot_iter,show_lr(optimizer),loss,wer,cer)) writer.add_scalar('val loss', loss, tot_iter) writer.add_scalar('wer', wer, tot_iter) writer.add_scalar('cer', cer, tot_iter) savename = '{}_loss_{}_wer_{}_cer_{}.pt'.format(opt.save_prefix, loss, wer, cer) (path, name) = os.path.split(savename) if(not os.path.exists(path)): os.makedirs(path) torch.save(model.state_dict(), savename) if(not opt.is_optimize): exit() if(__name__ == '__main__'): print("Loading options...") model = LipNet() model = model.cuda() net = nn.DataParallel(model).cuda() if(hasattr(opt, 'weights')): pretrained_dict = torch.load(opt.weights) model_dict = model.state_dict() pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys() and v.size() == model_dict[k].size()} missed_params = [k for k, v in model_dict.items() if not k in pretrained_dict.keys()] print('loaded params/tot params:{}/{}'.format(len(pretrained_dict),len(model_dict))) print('miss matched params:{}'.format(missed_params)) model_dict.update(pretrained_dict) model.load_state_dict(model_dict) torch.manual_seed(opt.random_seed) torch.cuda.manual_seed_all(opt.random_seed)
import torch import sys from torch import nn from model import LipNet from utils import * if(__name__ == '__main__'): opt = __import__('options') device = f'cuda' if torch.cuda.is_available() else 'cpu' print(device) model = LipNet() model = model.to(device) net = nn.DataParallel(model).to(device) if(hasattr(opt, 'weights')): pretrained_dict = torch.load( opt.weights, map_location=torch.device(device)) model_dict = model.state_dict() pretrained_dict = {k: v for k, v in pretrained_dict.items( ) if k in model_dict.keys() and v.size() == model_dict[k].size()} missed_params = [k for k, v in model_dict.items( ) if not k in pretrained_dict.keys()] print( 'loaded params/tot params:{}/{}'.format(len(pretrained_dict), len(model_dict))) print('miss matched params:{}'.format(missed_params)) model_dict.update(pretrained_dict)
writer.add_scalar('wer', np.array(wer).mean(), i_iter) writer.add_scalar('cer', np.array(cer).mean(), i_iter) writer.add_scalar('wla', np.array(wla).mean(), i_iter) writer.add_scalar('bla', sla, i_iter) return np.array(wer).mean(), np.array(cer).mean(), np.array( wla).mean(), sla if __name__ == '__main__': print("Loading options...") # load model isTCN = False if not isTCN: model = LipNet(isTransformer=opt.isTranformer, isDense=opt.isDense) else: model = TCNNetwork() model = model.cuda() net = nn.DataParallel(model).cuda() model_dict = model.state_dict() # Load the weight files pretrained_dict = torch.load(opt.weights) pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict.keys() and v.size() == model_dict[k].size() }