Exemple #1
0
async def start_handler(msg: types.Message):
    if msg.from_user.id in ADMINS:
        return await msg.reply('Админ панель:', reply_markup=kb.admin_panel)

    logger.info(msg.get_args())
    args = decode_start_command(msg.get_args(), 'Direct')
    logger.info(args)
    db.users.set_user_tag(msg.from_user.id, args)
    await msg.reply('Для разговора с техподдержкой напишите сообщение')
Exemple #2
0
def data_download():
    if os.path.isdir('./dataset/wav'):
        logger.info("emo-db already exists")
        return
    else:
        logger.info("emo-db downloading")
        r = requests.get('http://emodb.bilderbar.info/download/download.zip')
        z = zipfile.ZipFile(io.BytesIO(r.content))
        z.extractall('./dataset')
        shutil.rmtree('./dataset/lablaut')
        shutil.rmtree('./dataset/labsilb')
        shutil.rmtree('./dataset/silb')
        os.remove('./dataset/erkennung.txt')
        os.remove('./dataset/erklaerung.txt')
def evaluate(model, dataloader, queue, criterion, device):
    logger.info('evaluate() start')
    total_loss = 0.
    total_num = 0
    total_dist = 0
    total_length = 0
    total_sent_num = 0

    model.eval()

    with torch.no_grad():
        while True:
            feats, scripts, feat_lengths, script_lengths = queue.get()
            if feats.shape[0] == 0:
                break

            feats = feats.to(device)
            scripts = scripts.to(device)

            # src_len = scripts.size(1)
            target = scripts[:, 1:]

            model.module.flatten_parameters()
            logit = model(feats, feat_lengths, scripts, teacher_forcing_ratio=0.0)

            logit = torch.stack(logit, dim=1).to(device)
            y_hat = logit.max(-1)[1]

            loss = criterion(logit.contiguous().view(-1, logit.size(-1)),
                             target.contiguous().view(-1))
            total_loss += loss.item()
            total_num += sum(feat_lengths)

            display = random.randrange(0, 100) == 0
            dist, length = get_distance(target, y_hat, display=display)
            total_dist += dist
            total_length += length
            total_sent_num += target.size(0)

    logger.info('evaluate() completed')
    return total_loss / total_num, total_dist / total_length
def main():

    global char2index
    global index2char
    global SOS_token
    global EOS_token
    global PAD_token

    parser = argparse.ArgumentParser(description='Speech hackathon Baseline')
    parser.add_argument('--hidden_size', type=int, default=512, help='hidden size of model (default: 512)')
    parser.add_argument('--layer_size', type=int, default=3, help='number of layers of model (default: 3)')
    parser.add_argument('--dropout', type=float, default=0.2, help='dropout rate in training (default: 0.2)')
    parser.add_argument('--bidirectional', action='store_true', help='use bidirectional RNN for encoder (default: False)')
    parser.add_argument('--use_attention', action='store_true', help='use attention between encoder-decoder (default: False)')
    parser.add_argument('--batch_size', type=int, default=32, help='batch size in training (default: 32)')
    parser.add_argument('--workers', type=int, default=4, help='number of workers in dataset loader (default: 4)')
    parser.add_argument('--max_epochs', type=int, default=10, help='number of max epochs in training (default: 10)')
    parser.add_argument('--lr', type=float, default=1e-04, help='learning rate (default: 0.0001)')
    parser.add_argument('--teacher_forcing', type=float, default=0.5, help='teacher forcing ratio in decoder (default: 0.5)')
    parser.add_argument('--max_len', type=int, default=80, help='maximum characters of sentence (default: 80)')
    parser.add_argument('--no_cuda', action='store_true', help='disables CUDA training')
    parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)')
    parser.add_argument('--save_name', type=str, default='model', help='the name of model in nsml or local')
    parser.add_argument('--mode', type=str, default='train')
    parser.add_argument('--pause', type=int, default=0)

    parser.add_argument('--log_dir', help='directory for logging, valid in local only')
    parser.add_argument('--patience', type=int, help='patience before early stopping (default to None)')
    parser.add_argument('--weight_decay', type=float, default=0, help='weight for L2 regularization')
    parser.add_argument('--save_from', type=int, default=0, help='starting epoch to save models')
    parser.add_argument('--load_ckpt', nargs=2, help='session and checkpoint to load')

    parser.add_argument('--transformer_encoder', action='store_true')
    parser.add_argument('--share_params', action='store_true')

    args = parser.parse_args()

    for name, value in args.__dict__.items():
        print('{}:\t{}'.format(name, value))
    print()

    if nsml.IS_ON_NSML:
        args.log_dir = None

    if args.log_dir is not None:
        if not osp.exists(args.log_dir):
            os.makedirs(args.log_dir)

        with open(osp.join(args.log_dir, 'args.txt'), 'w') as f:
            for name, value in args.__dict__.items():
                f.write('{}\t{}\n'.format(name, value))

    char2index, index2char = label_loader.load_label('./hackathon.labels')
    SOS_token = char2index['<s>']
    EOS_token = char2index['</s>']
    PAD_token = char2index['_']

    random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    args.cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device('cuda' if args.cuda else 'cpu')

    # N_FFT: defined in loader.py
    feature_size = N_FFT / 2 + 1

    if args.transformer_encoder:
        enc = Encoder(len_max_seq=1248, d_word_vec=257, n_layers=6, n_head=8, d_k=64, d_v=64,
                      d_model=257, d_inner=2048, dropout=0.1, share_params=args.share_params)
    else:
        enc = EncoderRNN(
            feature_size, args.hidden_size, input_dropout_p=args.dropout, dropout_p=args.dropout,
            n_layers=args.layer_size, bidirectional=args.bidirectional, rnn_cell='gru',
            variable_lengths=False)

    dec = DecoderRNN(
        len(char2index), args.max_len, args.hidden_size * (2 if args.bidirectional else 1),
        SOS_token, EOS_token, n_layers=args.layer_size, rnn_cell='gru',
        bidirectional=args.bidirectional, input_dropout_p=args.dropout, dropout_p=args.dropout,
        use_attention=args.use_attention)

    if args.transformer_encoder:
        model = Seq2SeqTransformerEncoder(enc, dec)
    else:
        model = Seq2seq(enc, dec)
    model.flatten_parameters()

    for param in model.parameters():
        param.data.uniform_(-0.08, 0.08)

    model = nn.DataParallel(model).to(device)

    optimizer = optim.Adam(model.module.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    criterion = nn.CrossEntropyLoss(reduction='sum', ignore_index=PAD_token).to(device)

    bind_model(model, optimizer)

    if args.load_ckpt is not None:
        nsml.load(session=args.load_ckpt[0], checkpoint=args.load_ckpt[1])

    if args.pause == 1:
        nsml.paused(scope=locals())

    if args.mode != "train":
        return

    data_list = osp.join(DATASET_PATH, 'train_data', 'data_list.csv')
    wav_paths = list()
    script_paths = list()

    with open(data_list, 'r') as f:
        for line in f:
            # line: "aaa.wav,aaa.label"

            wav_path, script_path = line.strip().split(',')
            wav_paths.append(osp.join(DATASET_PATH, 'train_data', wav_path))
            script_paths.append(osp.join(DATASET_PATH, 'train_data', script_path))

    cnt_converged = 0
    best_loss = 1e10
    begin_epoch = 0

    # load all target scripts for reducing disk i/o
    target_path = osp.join(DATASET_PATH, 'train_label')
    load_targets(target_path)

    train_batch_num, train_dataset_list, valid_dataset = split_dataset(
        args, wav_paths, script_paths, valid_ratio=0.05)

    logger.info('start')

    train_begin = time.time()

    if args.log_dir is not None:
        train_writer = SummaryWriter(logdir=osp.join(args.log_dir, 'train'))
        valid_writer = SummaryWriter(logdir=osp.join(args.log_dir, 'valid'))
    else:
        train_writer, valid_writer = None, None

    for epoch in range(begin_epoch, args.max_epochs):
        if args.load_ckpt is not None:
            valid_queue = queue.Queue(args.workers * 2)
            valid_loader = BaseDataLoader(valid_dataset, valid_queue, args.batch_size, 0)
            valid_loader.start()

            eval_loss, eval_cer = evaluate(model, valid_loader, valid_queue, criterion, device)
            logger.info('Eval right after model loading (just for checking)')
            logger.info('Epoch %d (Evaluate) Loss %0.4f CER %0.4f' % (epoch, eval_loss, eval_cer))

        train_queue = queue.Queue(args.workers * 2)
        train_loader = MultiLoader(train_dataset_list, train_queue, args.batch_size, args.workers)
        train_loader.start()

        train_loss, train_cer = train(model, train_batch_num, train_queue, criterion, optimizer,
                                      device, train_begin, args.workers, 100, args.teacher_forcing)
        logger.info('Epoch %d (Training) Loss %0.4f CER %0.4f' % (epoch, train_loss, train_cer))
        if args.log_dir is not None:
            train_writer.add_scalar('epoch/loss', train_loss, epoch)
            train_writer.add_scalar('epoch/CER', train_cer, epoch)

        train_loader.join()

        valid_queue = queue.Queue(args.workers * 2)
        valid_loader = BaseDataLoader(valid_dataset, valid_queue, args.batch_size, 0)
        valid_loader.start()

        eval_loss, eval_cer = evaluate(model, valid_loader, valid_queue, criterion, device)
        logger.info('Epoch %d (Evaluate) Loss %0.4f CER %0.4f' % (epoch, eval_loss, eval_cer))
        if args.log_dir is not None:
            valid_writer.add_scalar('epoch/loss', eval_loss, epoch)
            valid_writer.add_scalar('epoch/CER', eval_cer, epoch)

            with open(osp.join(args.log_dir, 'loss.txt'), 'a') as f:
                f.write('epoch: {}, train: {:.6f}, valid: {:.6f}\n'.format(epoch, train_loss, eval_loss))
            with open(osp.join(args.log_dir, 'CER.txt'), 'a') as f:
                f.write('epoch: {}, train: {:.6f}, valid: {:.6f}\n'.format(epoch, train_cer, eval_cer))

        valid_loader.join()

        nsml.report(False, step=epoch, train_epoch__loss=train_loss, train_epoch__cer=train_cer,
                    eval__loss=eval_loss, eval__cer=eval_cer)

        if epoch > args.save_from:
            nsml.save(args.save_name + '_e{}'.format(epoch))

        best_model = (eval_loss < best_loss)
        if best_model:
            nsml.save('best')
            best_loss = eval_loss

        if eval_loss > best_loss:
            cnt_converged += 1
            if args.patience is not None and cnt_converged > args.patience:
                break
        else:
            cnt_converged = 0
def train(model, total_batch_size, queue, criterion, optimizer, device, train_begin,
          train_loader_count, print_batch=100, teacher_forcing_ratio=1):
    total_loss = 0.
    total_num = 0
    total_dist = 0
    total_length = 0
    total_sent_num = 0
    batch = 0

    model.train()

    logger.info('train() start')

    begin = epoch_begin = time.time()

    while True:
        if queue.empty():
            logger.debug('queue is empty')

        feats, scripts, feat_lengths, script_lengths = queue.get()

        if feats.shape[0] == 0:
            # empty feats means closing one loader
            train_loader_count -= 1

            logger.debug('left train_loader: %d' % (train_loader_count))

            if train_loader_count == 0:
                break
            else:
                continue

        optimizer.zero_grad()

        feats = feats.to(device)
        scripts = scripts.to(device)

        # src_len = scripts.size(1)
        target = scripts[:, 1:]

        model.module.flatten_parameters()
        logit = model(feats, feat_lengths, scripts, teacher_forcing_ratio=teacher_forcing_ratio)

        logit = torch.stack(logit, dim=1).to(device)

        y_hat = logit.max(-1)[1]

        loss = criterion(logit.contiguous().view(-1, logit.size(-1)), target.contiguous().view(-1))
        total_loss += loss.item()
        total_num += sum(feat_lengths)

        display = random.randrange(0, 100) == 0
        dist, length = get_distance(target, y_hat, display=display)
        total_dist += dist
        total_length += length

        total_sent_num += target.size(0)

        loss.backward()
        optimizer.step()

        if batch % print_batch == 0:
            current = time.time()
            elapsed = current - begin
            epoch_elapsed = (current - epoch_begin) / 60.0
            train_elapsed = (current - train_begin) / 3600.0

            logger.info(
                'batch: {:4d}/{:4d}, loss: {:.4f}, cer: {:.2f}, elapsed: {:.2f}s {:.2f}m {:.2f}h'.
                format(batch, total_batch_size, total_loss / total_num, total_dist / total_length,
                       elapsed, epoch_elapsed, train_elapsed))
            begin = time.time()

            nsml.report(False, step=train.cumulative_batch_count,
                        train_step__loss=total_loss / total_num,
                        train_step__cer=total_dist / total_length)
        batch += 1
        train.cumulative_batch_count += 1

    logger.info('train() completed')
    return total_loss / total_num, total_dist / total_length