def train(all_args, model, tokenizer, train_dataset, valid_dataset, ignore_index):
    """
    Trains GPT2 model and logs necessary details.
        Args:
            all_args: argparse object with train loop settings
            model: gpt2 model
            tokenizer: GPT2 tokenizer
            train_dataset: GPT21024Dataset object with training data
            ignore_index: token not considered in loss calculation
    """

    # создаем даталоадер из трейнового датасета
    train_sampler = RandomSampler(train_dataset)
    train_dl = DataLoader(train_dataset, sampler=train_sampler, batch_size=all_args.batch_size,
                          num_workers=all_args.num_workers)
    # задаем лосс функцию, оптимизатор и планировщик
    loss_fct = CrossEntropyLoss(ignore_index=ignore_index)  # ignores padding token for loss calculation
    optimizer = AdamW(model.parameters(), lr=all_args.lr)
    scheduler = get_linear_schedule_with_warmup(optimizer, 100, 80000)

    global_step = 0
    model.zero_grad()
    set_seed(all_args)
    for epoch_number in range(1, all_args.num_train_epochs + 1):
        epoch_iterator = tqdm(train_dl, desc="Training")
        for step, batch in enumerate(epoch_iterator):
            inputs, labels = torch.tensor(batch['article']), torch.tensor(batch['article'])
            inputs = inputs.to(all_args.device)
            labels = labels.to(all_args.device)
            attention_mask = torch.tensor(batch['attention_mask']).to(all_args.device)
            model.train()
            logits = model(inputs, attention_mask=attention_mask)[0]
            index = batch['sum_idx']    # тут индексы токенов-разделителей для каждой последовательности в батче shape = (batch,)
            loss = 0
            for idx, logs, labs in zip(index, logits, labels):
                shift_logits = logs[idx:-1, :]  # для вычисления лоса берем часть последовательностей справа от сепаратора
                shift_labels = labs[idx + 1:]   # смещаем предсказания и лейблы на одну позицию относительно друг друга
                loss += loss_fct(shift_logits, shift_labels)
            loss = loss / all_args.gradient_accumulation_steps / index.shape[0]  # лосс делится на размер батча
                                                                            # и количество шагов аккамуляции градиента
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), all_args.max_grad_norm)
            if (step + 1) % all_args.gradient_accumulation_steps == 0:
                optimizer.step()    # делаем шаг оптимизатора раз в gradient_accumulation_steps батчей
                scheduler.step()
                model.zero_grad()
                global_step += 1
                print("updates passed: %d\tloss: %f" % (global_step, loss.item()), end='\n\n')

            if (step + 1) % (500 * all_args.gradient_accumulation_steps) == 0:
                # раз в 30 шагов оптимизатора выводим сэмплы резюме и подсчитываем метрики на валидации
                print('After', global_step + 1, 'updates: ', end='\n\n')
                evaluate(all_args, model, valid_dataset, ignore_index)
                generate_sample(valid_dataset, tokenizer, model, num=2, eval_step=True, device=all_args.device)
                watch_metrics(all_args, model, tokenizer, valid_dataset, num=50, mode='val')

        # сохраняем обученную модель каждую эпоху
        new_model_dir = os.path.join(all_args.model_dir, str(epoch_number))
        os.mkdir(new_model_dir)
        model.save_pretrained(new_model_dir)
Esempio n. 2
0
def train(model, provider, optimizer, criterion, iterations, device, model_comb, scheduler=None):
    mean_losses = []
    losses = []
    try:
        for iteration in tqdm(range(iterations)):
            if not scheduler is None:
                scheduler.step()
            data, targets = provider.get_batch()
            data, targets = torch.tensor(data=data, dtype=torch.long).to(device), torch.tensor(data=targets, dtype=torch.long).to(device)
            model.zero_grad()

            target_preds = F.log_softmax(model(data), dim=1)

            loss = criterion(target_preds, targets)
            loss.backward()
            optimizer.step()
            losses.append(loss.cpu().item())
            if len(losses) == 1000:
                mean_losses.append(np.mean(losses))
                losses = []
        
            if iteration % 100000 == 0 or iteration == iterations - 1:
                with torch.no_grad():
                    sample = generate_sample(model, provider, device=device, length=100, temperature=1.0)
                    midi = provider.to_midi(sample)
                    midi.write(f'../music/{model_comb}/sample_{iteration}.midi')
    except KeyboardInterrupt:
        pass

    return mean_losses
Esempio n. 3
0
def get_mean_hamming(data, test_count=10, size=50):
    test_results = []
    for i in xrange(test_count):
        observations, result_states = generate_sample(data, size=size)
        prob, path = viterbi(observations,
                             data.start_probability,
                             data.transition_probability,
                             data.emission_probability)

        hamming = [0 if a == b else 1 for a, b in zip(path, result_states)]
        test_results.append(sum(hamming))
        # print ''.join(map(str, path))
        # print ''.join(map(str, result_states))
        # print ''.join(map(str, hamming))
    np_res = np.array(test_results)
    return np_res.mean()
Esempio n. 4
0
def gen():
    return generate_sample(data_tr_sampled, tokenizer, qids_name_tr,
                           voc_size_mnt, voc_size_ent, BATCH, nlines)
            loss = loss/args.gradient_accumulation_steps
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
            tr_loss += loss.item()
            if (step + 1) % args.gradient_accumulation_steps == 0:
                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1
                writer.add_scalar('lr', scheduler.get_lr()[0], global_step)
                writer.add_scalar('loss', (tr_loss - logging_loss)/args.gradient_accumulation_steps, global_step)
                logging_loss = tr_loss
                print("loss:", loss.item(), end='\n\n')
                if (step + 1)/args.gradient_accumulation_steps == 1.0:
                	print('After 1st update: ', end='\n\n')
                	generate_sample(valid_dataset, tokenizer, num=2, eval_step=False,device=args.device)
                
                
            if (step + 1) % (10*args.gradient_accumulation_steps) == 0:
                results = evaluate(args, model, valid_dataset, ignore_index, global_step)
                for key, value in results.items():
                    writer.add_scalar('eval_{}'.format(key), value, global_step)
                print('After', global_step+1,'updates: ', end='\n\n')
                generate_sample(valid_dataset, tokenizer, num=2, eval_step=True,device=args.device)
                    
     

 def evaluate(args, model, eval_dataset, ignore_index, global_step=None):
 	""" Returns perplexity score on validation dataset.
 		Args:
 			args: dict that contains all the necessary information passed by user while training
Esempio n. 6
0
def train(args, model, tokenizer, train_dataset, valid_dataset, ignore_index):
    # 	"""Trains GPT2 model and logs necessary details.
    #     	Args:
    # 			args: dict that contains all the necessary information passed by user while training
    #  			model: finetuned gpt/gpt2 model
    # 			tokenizer: GPT/GPT2 tokenizer
    # 			train_dataset: GPT21024Dataset object for training data
    # 			ignore_index: token not considered in loss calculation
    #     """

    writer = SummaryWriter('./logs')
    train_sampler = RandomSampler(train_dataset)
    train_dl = DataLoader(train_dataset,
                          sampler=train_sampler,
                          batch_size=args.batch_size,
                          num_workers=args.num_workers)
    loss_fct = CrossEntropyLoss(
        ignore_index=ignore_index)  #ignores padding token for loss calculation
    optimizer = AdamW(model.parameters(), lr=args.lr)
    scheduler = WarmupLinearSchedule(optimizer, 100, 80000)

    global_step = 0
    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()
    train_iterator = tnrange(int(args.num_train_epochs), desc="Epoch")
    set_seed(args)
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dl, desc="Training")
        for step, batch in enumerate(epoch_iterator):
            inputs, labels = torch.tensor(batch['article']), torch.tensor(
                batch['article'])
            inputs = inputs.to(args.device)
            labels = labels.to(args.device)
            model.train()
            logits = model(inputs)[0]
            idx = batch['sum_idx'].item()  # index of separator token
            # only consider loss on reference summary just like seq2seq models
            shift_logits = logits[..., idx:-1, :].contiguous()
            shift_labels = labels[..., idx + 1:].contiguous()
            loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)),
                            shift_labels.view(-1))
            loss = loss / args.gradient_accumulation_steps
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           args.max_grad_norm)
            tr_loss += loss.item()
            if (step + 1) % args.gradient_accumulation_steps == 0:
                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1
                writer.add_scalar('lr', scheduler.get_lr()[0], global_step)
                writer.add_scalar('loss', (tr_loss - logging_loss) /
                                  args.gradient_accumulation_steps,
                                  global_step)
                logging_loss = tr_loss
                print("loss:", loss.item(), end='\n\n')
                if (step + 1) / args.gradient_accumulation_steps == 1.0:
                    print('After 1st update: ', end='\n\n')
                    generate_sample(valid_dataset,
                                    tokenizer,
                                    model,
                                    num=2,
                                    eval_step=False)

            if (step + 1) % (10 * args.gradient_accumulation_steps) == 0:
                results = evaluate(args, model, valid_dataset, ignore_index,
                                   global_step)
                for key, value in results.items():
                    writer.add_scalar('eval_{}'.format(key), value,
                                      global_step)
                print('After', global_step + 1, 'updates: ', end='\n\n')
                generate_sample(valid_dataset,
                                tokenizer,
                                model,
                                num=2,
                                eval_step=True)
            loss = loss/args.gradient_accumulation_steps
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
            tr_loss += loss.item()
            if (step + 1) % args.gradient_accumulation_steps == 0:
                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1
                writer.add_scalar('lr', scheduler.get_lr()[0], global_step)
                writer.add_scalar('loss', (tr_loss - logging_loss)/args.gradient_accumulation_steps, global_step)
                logging_loss = tr_loss
                print("loss:", loss.item(), end='\n\n')
                if (step + 1)/args.gradient_accumulation_steps == 1.0:
                	print('After 1st update: ', end='\n\n')
                	generate_sample(valid_dataset, tokenizer, num=2, eval_step=False)
                
                
            if (step + 1) % (10*args.gradient_accumulation_steps) == 0:
                results = evaluate(args, model, valid_dataset, ignore_index, global_step)
                for key, value in results.items():
                    writer.add_scalar('eval_{}'.format(key), value, global_step)
                print('After', global_step+1,'updates: ', end='\n\n')
                generate_sample(valid_dataset, tokenizer, num=2, eval_step=True)
                    
     

 def evaluate(args, model, eval_dataset, ignore_index, global_step=None):
 	""" Returns perplexity score on validation dataset.
 		Args:
 			args: dict that contains all the necessary information passed by user while training
        'volume', 'numberOfTrades', 'var_ema', 'var_bollinger', 'var_stoch',
        'RSI'
    ]
    categorical_columns = [
        'rsi_indicator', 'stoch_indicator', 'CDL2CROWS', 'CDL3BLACKCROWS',
        'CDL3INSIDE', 'CDL3LINESTRIKE', 'CDL3OUTSIDE', 'CDL3STARSINSOUTH',
        'CDL3WHITESOLDIERS', 'CDLABANDONEDBABY', 'CDLADVANCEBLOCK',
        'CDLBELTHOLD', 'CDLBREAKAWAY', 'CDLCLOSINGMARUBOZU',
        'CDLCONCEALBABYSWALL', 'CDLCOUNTERATTACK', 'CDLDARKCLOUDCOVER',
        'CDLDOJI', 'CDLDOJISTAR', 'CDLDRAGONFLYDOJI', 'CDLENGULFING',
        'CDLEVENINGDOJISTAR', 'CDLEVENINGSTAR', 'CDLGAPSIDESIDEWHITE',
        'CDLGRAVESTONEDOJI', 'CDLHAMMER', 'CDLHANGINGMAN', 'CDLHARAMI',
        'CDLHARAMICROSS', 'CDLHIGHWAVE', 'CDLHIKKAKE', 'CDLHIKKAKEMOD',
        'CDLHOMINGPIGEON', 'CDLIDENTICAL3CROWS', 'CDLINNECK',
        'CDLINVERTEDHAMMER', 'CDLKICKING', 'CDLKICKINGBYLENGTH',
        'CDLLADDERBOTTOM', 'CDLLONGLEGGEDDOJI', 'CDLLONGLINE', 'CDLMARUBOZU',
        'CDLMATCHINGLOW', 'CDLMATHOLD', 'CDLMORNINGDOJISTAR', 'CDLMORNINGSTAR',
        'CDLONNECK', 'CDLPIERCING', 'CDLRICKSHAWMAN', 'CDLRISEFALL3METHODS',
        'CDLSEPARATINGLINES', 'CDLSHOOTINGSTAR', 'CDLSHORTLINE',
        'CDLSPINNINGTOP', 'CDLSTALLEDPATTERN', 'CDLSTICKSANDWICH', 'CDLTAKURI',
        'CDLTASUKIGAP', 'CDLTHRUSTING', 'CDLTRISTAR', 'CDLUNIQUE3RIVER',
        'CDLUPSIDEGAP2CROWS', 'CDLXSIDEGAP3METHODS'
    ]
    model = RandomForestClassifier(n_estimators=60,
                                   max_depth=7,
                                   min_samples_leaf=5)

    SAMPLE_SIZE = utils.generate_sample(df_staging, df_master)
    utils.check_distribution(SAMPLE_SIZE, df_staging, df_master, mode, model,
                             continuous_columns, categorical_columns)
Esempio n. 9
0
def run_on_test_data(data, size=10):
    observations, _ = generate_sample(data, size=size)
    return run_on_seq(observations), observations
Esempio n. 10
0
shutil.copy('opt.py', os.path.join(result, 'opt.py'))

# setup models
generator = Generator(opt.n_charactor)
discriminator = Discriminator()
if args.gpu >= 0:
    chainer.cuda.get_device_from_id(args.gpu).use()
    generator.to_gpu()
    discriminator.to_gpu()

# setup optimizers
opt_g = set_opt(generator, chainer.optimizers.Adam(opt.lr_gen, beta1=0.5),
                chainer.optimizer.GradientClipping(10),
                chainer.optimizer.WeightDecay(0.0001))
opt_d = set_opt(discriminator, chainer.optimizers.Adam(opt.lr_dis, beta1=0.5),
                chainer.optimizer.GradientClipping(10),
                chainer.optimizer.WeightDecay(0.0001))

# setup trainer
updater = DFGUpdater(opt_g, opt_d, train_iter, args.gpu)
trainer = chainer.training.Trainer(updater, opt.trigger, out=result)

# setup extensions
trainer.extend(extensions.LogReport(trigger=(100, 'iteration')))
trainer.extend(extensions.PrintReport(
    ['iteration', 'loss/recon', 'loss/h', 'loss/gan/gen', 'loss/gan/dis']),
               trigger=(100, 'iteration'))
trainer.extend(extensions.ProgressBar(update_interval=100))
trainer.extend(generate_sample(generator, valid_iter, args.gpu))
trainer.run()