Ejemplo n.º 1
0
xs = [np2tensor(x).float() for x in batch['xs']]
xlens = torch.IntTensor([len(x) for x in batch['xs']])
xs = pad_list(xs, 0.0)
ys = batch['ys']
_ys = [np2tensor(np.fromiter(y, dtype=np.int64), -1)
       for y in ys]  # // TODO vishay optimize for gpu
ys_out_pad = pad_list(_ys, 0).long()
ylens = np2tensor(np.fromiter([y.size(0) for y in _ys], dtype=np.int32))
# TODO use config file
model = Transducer(81, vocab, 256, 3, args.dropout, bidirectional=args.bi)
print(model)
for param in model.parameters():
    torch.nn.init.uniform(param, -0.1, 0.1)
if args.init: model.load_state_dict(torch.load(args.init))
if args.initam: model.encoder.load_state_dict(torch.load(args.initam))
if args.cuda: model.cuda()

optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                   model.parameters()),
                            lr=args.lr,
                            momentum=.9)

# data set
# trainset = SequentialLoader('train', args.batch_size)
# devset = SequentialLoader('dev', args.batch_size)


# model = EncoderDecoder(vocab)
def removeDuplicates(S):
    S = list(S)
    n = len(S)
Ejemplo n.º 2
0
                   lm_checkpoint='exp/lm.bin')

train = AudioDataset(
    '/media/lytic/STORE/ru_open_stt_wav/public_youtube1120_hq.txt', labels)
test = AudioDataset(
    '/media/lytic/STORE/ru_open_stt_wav/public_youtube700_val.txt', labels)

train.filter_by_conv(model.encoder.conv)
train.filter_by_length(400)

test.filter_by_conv(model.encoder.conv)
test.filter_by_length(200)

optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, weight_decay=1e-5)

model.cuda()

sampler = BucketingSampler(train, 32)

train = DataLoader(train,
                   pin_memory=True,
                   num_workers=4,
                   collate_fn=collate_fn_rnnt,
                   batch_sampler=sampler)
test = DataLoader(test,
                  pin_memory=True,
                  num_workers=4,
                  collate_fn=collate_fn_rnnt,
                  batch_size=16)

for epoch in range(10):