async def start_handler(msg: types.Message): if msg.from_user.id in ADMINS: return await msg.reply('Админ панель:', reply_markup=kb.admin_panel) logger.info(msg.get_args()) args = decode_start_command(msg.get_args(), 'Direct') logger.info(args) db.users.set_user_tag(msg.from_user.id, args) await msg.reply('Для разговора с техподдержкой напишите сообщение')
def data_download(): if os.path.isdir('./dataset/wav'): logger.info("emo-db already exists") return else: logger.info("emo-db downloading") r = requests.get('http://emodb.bilderbar.info/download/download.zip') z = zipfile.ZipFile(io.BytesIO(r.content)) z.extractall('./dataset') shutil.rmtree('./dataset/lablaut') shutil.rmtree('./dataset/labsilb') shutil.rmtree('./dataset/silb') os.remove('./dataset/erkennung.txt') os.remove('./dataset/erklaerung.txt')
def evaluate(model, dataloader, queue, criterion, device): logger.info('evaluate() start') total_loss = 0. total_num = 0 total_dist = 0 total_length = 0 total_sent_num = 0 model.eval() with torch.no_grad(): while True: feats, scripts, feat_lengths, script_lengths = queue.get() if feats.shape[0] == 0: break feats = feats.to(device) scripts = scripts.to(device) # src_len = scripts.size(1) target = scripts[:, 1:] model.module.flatten_parameters() logit = model(feats, feat_lengths, scripts, teacher_forcing_ratio=0.0) logit = torch.stack(logit, dim=1).to(device) y_hat = logit.max(-1)[1] loss = criterion(logit.contiguous().view(-1, logit.size(-1)), target.contiguous().view(-1)) total_loss += loss.item() total_num += sum(feat_lengths) display = random.randrange(0, 100) == 0 dist, length = get_distance(target, y_hat, display=display) total_dist += dist total_length += length total_sent_num += target.size(0) logger.info('evaluate() completed') return total_loss / total_num, total_dist / total_length
def main(): global char2index global index2char global SOS_token global EOS_token global PAD_token parser = argparse.ArgumentParser(description='Speech hackathon Baseline') parser.add_argument('--hidden_size', type=int, default=512, help='hidden size of model (default: 512)') parser.add_argument('--layer_size', type=int, default=3, help='number of layers of model (default: 3)') parser.add_argument('--dropout', type=float, default=0.2, help='dropout rate in training (default: 0.2)') parser.add_argument('--bidirectional', action='store_true', help='use bidirectional RNN for encoder (default: False)') parser.add_argument('--use_attention', action='store_true', help='use attention between encoder-decoder (default: False)') parser.add_argument('--batch_size', type=int, default=32, help='batch size in training (default: 32)') parser.add_argument('--workers', type=int, default=4, help='number of workers in dataset loader (default: 4)') parser.add_argument('--max_epochs', type=int, default=10, help='number of max epochs in training (default: 10)') parser.add_argument('--lr', type=float, default=1e-04, help='learning rate (default: 0.0001)') parser.add_argument('--teacher_forcing', type=float, default=0.5, help='teacher forcing ratio in decoder (default: 0.5)') parser.add_argument('--max_len', type=int, default=80, help='maximum characters of sentence (default: 80)') parser.add_argument('--no_cuda', action='store_true', help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') parser.add_argument('--save_name', type=str, default='model', help='the name of model in nsml or local') parser.add_argument('--mode', type=str, default='train') parser.add_argument('--pause', type=int, default=0) parser.add_argument('--log_dir', help='directory for logging, valid in local only') parser.add_argument('--patience', type=int, help='patience before early stopping (default to None)') parser.add_argument('--weight_decay', type=float, default=0, help='weight for L2 regularization') parser.add_argument('--save_from', type=int, default=0, help='starting epoch to save models') parser.add_argument('--load_ckpt', nargs=2, help='session and checkpoint to load') parser.add_argument('--transformer_encoder', action='store_true') parser.add_argument('--share_params', action='store_true') args = parser.parse_args() for name, value in args.__dict__.items(): print('{}:\t{}'.format(name, value)) print() if nsml.IS_ON_NSML: args.log_dir = None if args.log_dir is not None: if not osp.exists(args.log_dir): os.makedirs(args.log_dir) with open(osp.join(args.log_dir, 'args.txt'), 'w') as f: for name, value in args.__dict__.items(): f.write('{}\t{}\n'.format(name, value)) char2index, index2char = label_loader.load_label('./hackathon.labels') SOS_token = char2index['<s>'] EOS_token = char2index['</s>'] PAD_token = char2index['_'] random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) args.cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device('cuda' if args.cuda else 'cpu') # N_FFT: defined in loader.py feature_size = N_FFT / 2 + 1 if args.transformer_encoder: enc = Encoder(len_max_seq=1248, d_word_vec=257, n_layers=6, n_head=8, d_k=64, d_v=64, d_model=257, d_inner=2048, dropout=0.1, share_params=args.share_params) else: enc = EncoderRNN( feature_size, args.hidden_size, input_dropout_p=args.dropout, dropout_p=args.dropout, n_layers=args.layer_size, bidirectional=args.bidirectional, rnn_cell='gru', variable_lengths=False) dec = DecoderRNN( len(char2index), args.max_len, args.hidden_size * (2 if args.bidirectional else 1), SOS_token, EOS_token, n_layers=args.layer_size, rnn_cell='gru', bidirectional=args.bidirectional, input_dropout_p=args.dropout, dropout_p=args.dropout, use_attention=args.use_attention) if args.transformer_encoder: model = Seq2SeqTransformerEncoder(enc, dec) else: model = Seq2seq(enc, dec) model.flatten_parameters() for param in model.parameters(): param.data.uniform_(-0.08, 0.08) model = nn.DataParallel(model).to(device) optimizer = optim.Adam(model.module.parameters(), lr=args.lr, weight_decay=args.weight_decay) criterion = nn.CrossEntropyLoss(reduction='sum', ignore_index=PAD_token).to(device) bind_model(model, optimizer) if args.load_ckpt is not None: nsml.load(session=args.load_ckpt[0], checkpoint=args.load_ckpt[1]) if args.pause == 1: nsml.paused(scope=locals()) if args.mode != "train": return data_list = osp.join(DATASET_PATH, 'train_data', 'data_list.csv') wav_paths = list() script_paths = list() with open(data_list, 'r') as f: for line in f: # line: "aaa.wav,aaa.label" wav_path, script_path = line.strip().split(',') wav_paths.append(osp.join(DATASET_PATH, 'train_data', wav_path)) script_paths.append(osp.join(DATASET_PATH, 'train_data', script_path)) cnt_converged = 0 best_loss = 1e10 begin_epoch = 0 # load all target scripts for reducing disk i/o target_path = osp.join(DATASET_PATH, 'train_label') load_targets(target_path) train_batch_num, train_dataset_list, valid_dataset = split_dataset( args, wav_paths, script_paths, valid_ratio=0.05) logger.info('start') train_begin = time.time() if args.log_dir is not None: train_writer = SummaryWriter(logdir=osp.join(args.log_dir, 'train')) valid_writer = SummaryWriter(logdir=osp.join(args.log_dir, 'valid')) else: train_writer, valid_writer = None, None for epoch in range(begin_epoch, args.max_epochs): if args.load_ckpt is not None: valid_queue = queue.Queue(args.workers * 2) valid_loader = BaseDataLoader(valid_dataset, valid_queue, args.batch_size, 0) valid_loader.start() eval_loss, eval_cer = evaluate(model, valid_loader, valid_queue, criterion, device) logger.info('Eval right after model loading (just for checking)') logger.info('Epoch %d (Evaluate) Loss %0.4f CER %0.4f' % (epoch, eval_loss, eval_cer)) train_queue = queue.Queue(args.workers * 2) train_loader = MultiLoader(train_dataset_list, train_queue, args.batch_size, args.workers) train_loader.start() train_loss, train_cer = train(model, train_batch_num, train_queue, criterion, optimizer, device, train_begin, args.workers, 100, args.teacher_forcing) logger.info('Epoch %d (Training) Loss %0.4f CER %0.4f' % (epoch, train_loss, train_cer)) if args.log_dir is not None: train_writer.add_scalar('epoch/loss', train_loss, epoch) train_writer.add_scalar('epoch/CER', train_cer, epoch) train_loader.join() valid_queue = queue.Queue(args.workers * 2) valid_loader = BaseDataLoader(valid_dataset, valid_queue, args.batch_size, 0) valid_loader.start() eval_loss, eval_cer = evaluate(model, valid_loader, valid_queue, criterion, device) logger.info('Epoch %d (Evaluate) Loss %0.4f CER %0.4f' % (epoch, eval_loss, eval_cer)) if args.log_dir is not None: valid_writer.add_scalar('epoch/loss', eval_loss, epoch) valid_writer.add_scalar('epoch/CER', eval_cer, epoch) with open(osp.join(args.log_dir, 'loss.txt'), 'a') as f: f.write('epoch: {}, train: {:.6f}, valid: {:.6f}\n'.format(epoch, train_loss, eval_loss)) with open(osp.join(args.log_dir, 'CER.txt'), 'a') as f: f.write('epoch: {}, train: {:.6f}, valid: {:.6f}\n'.format(epoch, train_cer, eval_cer)) valid_loader.join() nsml.report(False, step=epoch, train_epoch__loss=train_loss, train_epoch__cer=train_cer, eval__loss=eval_loss, eval__cer=eval_cer) if epoch > args.save_from: nsml.save(args.save_name + '_e{}'.format(epoch)) best_model = (eval_loss < best_loss) if best_model: nsml.save('best') best_loss = eval_loss if eval_loss > best_loss: cnt_converged += 1 if args.patience is not None and cnt_converged > args.patience: break else: cnt_converged = 0
def train(model, total_batch_size, queue, criterion, optimizer, device, train_begin, train_loader_count, print_batch=100, teacher_forcing_ratio=1): total_loss = 0. total_num = 0 total_dist = 0 total_length = 0 total_sent_num = 0 batch = 0 model.train() logger.info('train() start') begin = epoch_begin = time.time() while True: if queue.empty(): logger.debug('queue is empty') feats, scripts, feat_lengths, script_lengths = queue.get() if feats.shape[0] == 0: # empty feats means closing one loader train_loader_count -= 1 logger.debug('left train_loader: %d' % (train_loader_count)) if train_loader_count == 0: break else: continue optimizer.zero_grad() feats = feats.to(device) scripts = scripts.to(device) # src_len = scripts.size(1) target = scripts[:, 1:] model.module.flatten_parameters() logit = model(feats, feat_lengths, scripts, teacher_forcing_ratio=teacher_forcing_ratio) logit = torch.stack(logit, dim=1).to(device) y_hat = logit.max(-1)[1] loss = criterion(logit.contiguous().view(-1, logit.size(-1)), target.contiguous().view(-1)) total_loss += loss.item() total_num += sum(feat_lengths) display = random.randrange(0, 100) == 0 dist, length = get_distance(target, y_hat, display=display) total_dist += dist total_length += length total_sent_num += target.size(0) loss.backward() optimizer.step() if batch % print_batch == 0: current = time.time() elapsed = current - begin epoch_elapsed = (current - epoch_begin) / 60.0 train_elapsed = (current - train_begin) / 3600.0 logger.info( 'batch: {:4d}/{:4d}, loss: {:.4f}, cer: {:.2f}, elapsed: {:.2f}s {:.2f}m {:.2f}h'. format(batch, total_batch_size, total_loss / total_num, total_dist / total_length, elapsed, epoch_elapsed, train_elapsed)) begin = time.time() nsml.report(False, step=train.cumulative_batch_count, train_step__loss=total_loss / total_num, train_step__cer=total_dist / total_length) batch += 1 train.cumulative_batch_count += 1 logger.info('train() completed') return total_loss / total_num, total_dist / total_length