Esempio n. 1
0
def predict(video_path,
            weight_path,
            absolute_max_string_len=32,
            output_size=28):
    print("\nLoading data from disk...")
    video = Video(vtype='face', face_predictor_path=FACE_PREDICTOR_PATH)
    if os.path.isfile(video_path):
        video.from_video(video_path)
    else:
        video.from_frames(video_path)
    print("Data loaded.\n")

    if K.image_data_format() == 'channels_first':
        img_c, frames_n, img_w, img_h = video.data.shape
    else:
        frames_n, img_w, img_h, img_c = video.data.shape

    lipnet = LipNet(img_c=img_c,
                    img_w=img_w,
                    img_h=img_h,
                    frames_n=frames_n,
                    absolute_max_string_len=absolute_max_string_len,
                    output_size=output_size)

    adam = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

    lipnet.model.compile(loss={
        'ctc': lambda y_true, y_pred: y_pred
    },
                         optimizer=adam)
    lipnet.model.load_weights(weight_path)

    spell = Spell(path=PREDICT_DICTIONARY)
    decoder = Decoder(greedy=PREDICT_GREEDY,
                      beam_width=PREDICT_BEAM_WIDTH,
                      postprocessors=[labels_to_text, spell.sentence])

    X_data = np.array([video.data]).astype(np.float32) / 255
    input_length = np.array([len(video.data)])

    y_pred = lipnet.predict(X_data)
    result = decoder.decode(y_pred, input_length)[0]

    return (video, result)
Esempio n. 2
0
 def __init__(self, config):
     self.config = config
     self.device = torch.device(
         'cuda' if torch.cuda.is_available() else 'cpu')
     self.train_data = Dataset(num_of_frame=config.num_of_frame,
                               root=config.data_path,
                               mode='train')
     self.train_loader = get_loader(dataset=self.train_data,
                                    batch_size=config.batch_size,
                                    shuffle=True,
                                    drop_last=True)
     self.val_data = Dataset(num_of_frame=config.num_of_frame,
                             root=config.data_path,
                             mode='val')
     self.val_loader = get_loader(dataset=self.val_data,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  drop_last=True)
     # self.test_data
     self.lipnet = LipNet(config.vocab_size).to(self.device)
     self.ctc_loss = CTCLoss()
     self.optim = torch.optim.Adam(self.lipnet.parameters(),
                                   config.learning_rate)
Esempio n. 3
0
class Solver():
    def __init__(self, config):
        self.config = config
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.train_data = Dataset(num_of_frame=config.num_of_frame,
                                  root=config.data_path,
                                  mode='train')
        self.train_loader = get_loader(dataset=self.train_data,
                                       batch_size=config.batch_size,
                                       shuffle=True,
                                       drop_last=True)
        self.val_data = Dataset(num_of_frame=config.num_of_frame,
                                root=config.data_path,
                                mode='val')
        self.val_loader = get_loader(dataset=self.val_data,
                                     batch_size=config.batch_size,
                                     shuffle=True,
                                     drop_last=True)
        # self.test_data
        self.lipnet = LipNet(config.vocab_size).to(self.device)
        self.ctc_loss = CTCLoss()
        self.optim = torch.optim.Adam(self.lipnet.parameters(),
                                      config.learning_rate)

    def fit(self):
        for epoch in range(1, self.config.epoch + 1):
            for step, (frames, labels, frame_lens, label_lens,
                       text) in enumerate(self.train_loader):
                frames = frames.to(self.device)
                labels = labels
                frame_lens = frame_lens
                label_lens = label_lens
                output = self.lipnet(frames)
                acts = output.permute(1, 0,
                                      2).contiguous().cpu()  # (75, N, 28)
                loss = self.ctc_loss(acts, labels, frame_lens, label_lens)
                loss = loss.mean()

                if not torch.isnan(loss) and -1000000 <= loss <= 1000000:
                    loss.backward()
                    self.optim.step()
                else:
                    print('Skip NaN loss.')
                    continue

                print('Epoch[{}/{}]  Step[{}/{}]  Loss: {:.8f}  LR: {:.8f}'.
                      format(
                          epoch, self.config.epoch, step + 1,
                          self.train_data.__len__() // self.config.batch_size,
                          loss.item(), self.optim.param_groups[0]['lr']))

            if epoch % self.config.save_every == 0:
                self.save(epoch)

            if epoch > self.config.decay_after and (
                    epoch -
                    self.config.decay_after) % self.config.decay_every == 1:
                self.update_optim()

            self.validation()
            self.lipnet.train()

    def save(self, epoch):
        checkpoint = {'net': self.lipnet.state_dict(), 'config': self.config}
        os.makedirs(self.config.save_dir, exist_ok=True)
        output_path = os.path.join(self.config.save_dir,
                                   'model_{}'.format(epoch))
        torch.save(checkpoint, output_path)

    def update_optim(self):
        old_lr = self.optim.param_groups[0]['lr']
        new_lr = old_lr * self.config.decay_rate
        self.optim = torch.optim.Adam(self.lipnet.parameters(), new_lr)

    def validation(self):
        self.lipnet.eval()
        pass
                (loss, wer, cer) = test(model, net)
                print('i_iter={},lr={},loss={},wer={},cer={}'
                    .format(tot_iter,show_lr(optimizer),loss,wer,cer))
                writer.add_scalar('val loss', loss, tot_iter)                    
                writer.add_scalar('wer', wer, tot_iter)
                writer.add_scalar('cer', cer, tot_iter)
                savename = '{}_loss_{}_wer_{}_cer_{}.pt'.format(opt.save_prefix, loss, wer, cer)
                (path, name) = os.path.split(savename)
                if(not os.path.exists(path)): os.makedirs(path)
                torch.save(model.state_dict(), savename)
                if(not opt.is_optimize):
                    exit()
                
if(__name__ == '__main__'):
    print("Loading options...")
    model = LipNet()
    model = model.cuda()
    net = nn.DataParallel(model).cuda()

    if(hasattr(opt, 'weights')):
        pretrained_dict = torch.load(opt.weights)
        model_dict = model.state_dict()
        pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys() and v.size() == model_dict[k].size()}
        missed_params = [k for k, v in model_dict.items() if not k in pretrained_dict.keys()]
        print('loaded params/tot params:{}/{}'.format(len(pretrained_dict),len(model_dict)))
        print('miss matched params:{}'.format(missed_params))
        model_dict.update(pretrained_dict)
        model.load_state_dict(model_dict)
        
    torch.manual_seed(opt.random_seed)
    torch.cuda.manual_seed_all(opt.random_seed)
import torch
import sys
from torch import nn

from model import LipNet
from utils import *


if(__name__ == '__main__'):
    opt = __import__('options')

    device = f'cuda' if torch.cuda.is_available() else 'cpu'

    print(device)

    model = LipNet()
    model = model.to(device)
    net = nn.DataParallel(model).to(device)

    if(hasattr(opt, 'weights')):
        pretrained_dict = torch.load(
            opt.weights, map_location=torch.device(device))
        model_dict = model.state_dict()
        pretrained_dict = {k: v for k, v in pretrained_dict.items(
        ) if k in model_dict.keys() and v.size() == model_dict[k].size()}
        missed_params = [k for k, v in model_dict.items(
        ) if not k in pretrained_dict.keys()]
        print(
            'loaded params/tot params:{}/{}'.format(len(pretrained_dict), len(model_dict)))
        print('miss matched params:{}'.format(missed_params))
        model_dict.update(pretrained_dict)
Esempio n. 6
0
            writer.add_scalar('wer', np.array(wer).mean(), i_iter)
            writer.add_scalar('cer', np.array(cer).mean(), i_iter)
            writer.add_scalar('wla', np.array(wla).mean(), i_iter)
            writer.add_scalar('bla', sla, i_iter)

        return np.array(wer).mean(), np.array(cer).mean(), np.array(
            wla).mean(), sla


if __name__ == '__main__':
    print("Loading options...")
    # load model

    isTCN = False
    if not isTCN:
        model = LipNet(isTransformer=opt.isTranformer, isDense=opt.isDense)
    else:
        model = TCNNetwork()

    model = model.cuda()
    net = nn.DataParallel(model).cuda()

    model_dict = model.state_dict()
    # Load the weight files
    pretrained_dict = torch.load(opt.weights)
    pretrained_dict = {
        k: v
        for k, v in pretrained_dict.items()
        if k in model_dict.keys() and v.size() == model_dict[k].size()
    }