Esempio n. 1
0
   def configure_optimizers(self):
      "Prepare optimizer and schedule (linear warmup and decay)"
      model = self.model
      no_decay = ["bias", "LayerNorm.weight"]
      optimizer_grouped_parameters = [
          {
              "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
              "weight_decay": self.hparams.weight_decay,
          },
          {
              "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
              "weight_decay": 0.0,
          },
      ]
      # Original optimizer from Transformers. It works but needs warmup.
      # optimizer = transformers.AdamW(optimizer_grouped_parameters,
      #      lr=self.hparams.learning_rate, eps=self.hparams.adam_epsilon)
      # The RAdam optimizer works approximately as well as Ranger.
      #optimizer = RAdam(optimizer_grouped_parameters,
      #      lr=self.hparams.learning_rate, eps=self.hparams.adam_epsilon)
      # The Ranger optimizer is the combination of RAdam and Lookahead. It
      # works well for this task. The best conditions seem to be learning
      # rate 1e-4 w/ RAdam or Ranger, gradient accumulation of 2 batches.
      optimizer = ranger.Ranger(optimizer_grouped_parameters,
            lr=self.hparams.learning_rate, eps=self.hparams.adam_epsilon)

      # The constant scheduler does nothing. Replace with another
      # scheduler if required.
      scheduler = transformers.get_constant_schedule(optimizer)
      scheduler = {
          'scheduler': scheduler,
          'interval': 'step',
          'frequency': 1
      }
      return [optimizer], [scheduler]
Esempio n. 2
0
 def configure_optimizers(self):
     # Train with a lower LR on the output layer
     LR = 1e-3
     train_params = [
         {
             'params':
             self.get_layers(
                 lambda x: self.output != x and self.input != x),
             'lr':
             LR
         },
         {
             'params': self.get_layers(lambda x: self.input == x),
             'lr': LR,
             'gc_dim': 0
         },
         {
             'params': self.get_layers(lambda x: self.output == x),
             'lr': LR / 10
         },
     ]
     # increasing the eps leads to less saturated nets with a few dead neurons
     optimizer = ranger.Ranger(train_params, betas=(.9, 0.999), eps=1.0e-7)
     # Drop learning rate after 75 epochs
     scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                 step_size=75,
                                                 gamma=0.3)
     return [optimizer], [scheduler]
Esempio n. 3
0
 def configure_optimizers(self):
     # Train with a lower LR on the output layer
     LR = 1.5e-3
     train_params = [
         {
             'params': get_parameters([self.input]),
             'lr': LR,
             'gc_dim': 0
         },
         {
             'params': [self.layer_stacks.l1_fact.weight],
             'lr': LR
         },
         {
             'params': [self.layer_stacks.l1.weight],
             'lr': LR
         },
         {
             'params': [self.layer_stacks.l1.bias],
             'lr': LR
         },
         {
             'params': [self.layer_stacks.l2.weight],
             'lr': LR
         },
         {
             'params': [self.layer_stacks.l2.bias],
             'lr': LR
         },
         {
             'params': [self.layer_stacks.output.weight],
             'lr': LR / 10
         },
         {
             'params': [self.layer_stacks.output.bias],
             'lr': LR / 10
         },
     ]
     # increasing the eps leads to less saturated nets with a few dead neurons
     optimizer = ranger.Ranger(train_params,
                               betas=(.9, 0.999),
                               eps=1.0e-7,
                               gc_loc=False,
                               use_gc=False)
     # Drop learning rate after 75 epochs
     scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                 step_size=1,
                                                 gamma=0.987)
     return [optimizer], [scheduler]
Esempio n. 4
0
  def configure_optimizers(self):
    # Train with a lower LR on the output layer
    LR = 1.5e-3
    train_params = [
      {'params' : get_parameters([self.input]), 'lr' : LR, 'gc_dim' : 0 },
      {'params' : [self.layer_stacks.l1_fact.weight], 'lr' : LR },
      {'params' : [self.layer_stacks.l1.weight], 'lr' : LR },
      {'params' : [self.layer_stacks.l1.bias], 'lr' : LR },
      {'params' : [self.layer_stacks.l2.weight], 'lr' : LR },
      {'params' : [self.layer_stacks.l2.bias], 'lr' : LR },
      {'params' : [self.layer_stacks.output.weight], 'lr' : LR / 10 },
      {'params' : [self.layer_stacks.output.bias], 'lr' : LR / 10 },
    ]
    # increasing the eps leads to less saturated nets with a few dead neurons
    optimizer = ranger.Ranger(train_params, betas=(.9, 0.999), eps=1.0e-7, gc_loc=False, use_gc=False)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=100, verbose=True)

    return [optimizer], [scheduler]
Esempio n. 5
0
        self.model = models.resnet101(pretrained=False)
        self.model.fc = nn.Linear(2048, 10)

    def forward(self, x):
        output = self.model(x)
        return output


# In[8]:

import ranger

net = Net()
net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = ranger.Ranger(net.parameters(), lr=LR, eps=1e-6)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, EPOCH + 20)

loss_history = []
acc_history = []
tacc_history = []
tloss_history = []
lr_list = []
best_acc = 0
best_epoch = 0

# In[ ]:

start_time = time.time()
for epoch in range(EPOCH):
    epoch_time = time.time()
Esempio n. 6
0
def main(args):
    # Select which device to use
    if torch.cuda.is_available():
        main_device = 'cuda:0'
    else:
        main_device = 'cpu'

    # Create directories to store data and logs in
    output_path = prepare_output_directory()
    log_path = prepare_log_directory()

    # Print configuration info
    print(f'Device: {main_device}')
    print(f'Training set: {args.train}')
    print(f'Validation set: {args.val}')
    print(f'Batch size: {args.batch_size}')
    print(f'Using factorizer: {args.use_factorizer}')
    print(f'Lambda: {args.lambda_}')
    print(f'Validation check interval: {args.val_check_interval}')
    print(f'Logs written to: {log_path}')
    print(f'Data written to: {output_path}')
    print('')

    # Create log writer
    writer = SummaryWriter(log_path)

    # Create data loaders
    train_data_loader, val_data_loader = create_data_loaders(
        args.train, args.val, args.train_size, args.val_size, args.batch_size,
        args.use_factorizer, main_device)

    # Create model
    nnue = M.NNUE(args.use_factorizer,
                  feature_set=halfkp.Features()).to(main_device)

    # Configure optimizer
    optimizer = ranger.Ranger(nnue.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode='min',
                                                           factor=0.1,
                                                           patience=1,
                                                           verbose=True,
                                                           min_lr=1e-6)

    # Main training loop
    num_batches = len(train_data_loader)
    epoch = 0
    running_train_loss = 0.0
    while True:
        best_val_loss = 1000000.0

        for k, sample in enumerate(train_data_loader):
            train_loss = train_step(nnue, sample, optimizer, args.lambda_,
                                    epoch, k, num_batches)
            running_train_loss += train_loss.item()

            if k % args.val_check_interval == (args.val_check_interval - 1):
                val_loss = calculate_validation_loss(nnue, val_data_loader,
                                                     args.lambda_)
                new_best = False
                if (val_loss < best_val_loss):
                    new_best = True
                    best_val_loss = val_loss
                save_model(nnue, output_path, epoch, k, val_loss, new_best,
                           False)
                writer.add_scalar('training loss',
                                  running_train_loss / args.val_check_interval,
                                  epoch * num_batches + k)
                writer.add_scalar('validation loss', val_loss,
                                  epoch * num_batches + k)
                running_train_loss = 0.0

        val_loss = calculate_validation_loss(nnue, val_data_loader,
                                             args.lambda_)
        new_best = False
        if (val_loss < best_val_loss):
            new_best = True
            best_val_loss = val_loss
        save_model(nnue, output_path, epoch, num_batches - 1, val_loss,
                   new_best, True)
        print('')

        scheduler.step(val_loss)
        epoch += 1
Esempio n. 7
0
 def configure_optimizers(self):
   optimizer = ranger.Ranger(self.parameters())
   return optimizer
Esempio n. 8
0
def main():
    parser = argparse.ArgumentParser(description="Trains the network.")
    parser.add_argument("train", help="Training data (.bin or .binpack)")
    parser.add_argument("val", help="Validation data (.bin or .binpack)")

    parser.add_argument("--tune",
                        action="store_true",
                        help="automated LR search")
    parser.add_argument(
        "--save",
        action="store_true",
        help="save after every training epoch (default = False)")
    parser.add_argument("--experiment",
                        default="1",
                        type=str,
                        help="specify the experiment id")
    parser.add_argument("--py-data",
                        action="store_true",
                        help="Use python data loader (default=False)")
    parser.add_argument(
        "--lambda",
        default=1.0,
        type=float,
        dest='lambda_',
        help=
        "lambda=1.0 = train on evaluations, lambda=0.0 = train on game results, interpolates between (default=1.0)."
    )
    parser.add_argument(
        "--num-workers",
        default=1,
        type=int,
        dest='num_workers',
        help=
        "Number of worker threads to use for data loading. Currently only works well for binpack."
    )
    parser.add_argument(
        "--batch-size",
        default=-1,
        type=int,
        dest='batch_size',
        help=
        "Number of positions per batch / per iteration. Default on GPU = 8192 on CPU = 128."
    )
    parser.add_argument(
        "--threads",
        default=-1,
        type=int,
        dest='threads',
        help="Number of torch threads to use. Default automatic (cores) .")
    parser.add_argument("--seed",
                        default=42,
                        type=int,
                        dest='seed',
                        help="torch seed to use.")
    parser.add_argument(
        "--smart-fen-skipping",
        action='store_true',
        dest='smart_fen_skipping',
        help=
        "If enabled positions that are bad training targets will be skipped during loading. Default: False"
    )
    parser.add_argument(
        "--random-fen-skipping",
        default=0,
        type=int,
        dest='random_fen_skipping',
        help=
        "skip fens randomly on average random_fen_skipping before using one.")
    parser.add_argument(
        "--resume-from-model",
        dest='resume_from_model',
        help="Initializes training using the weights from the given .pt model")

    features.add_argparse_args(parser)
    args = parser.parse_args()

    print("Training with {} validating with {}".format(args.train, args.val))

    torch.manual_seed(123)
    torch.cuda.manual_seed(123)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

    batch_size = args.batch_size
    if batch_size <= 0:
        batch_size = 128 if args.gpus == 0 else 8192
    print('Using batch size {}'.format(batch_size))

    print('Smart fen skipping: {}'.format(args.smart_fen_skipping))
    print('Random fen skipping: {}'.format(args.random_fen_skipping))

    if args.threads > 0:
        print('limiting torch to {} threads.'.format(args.threads))
        t_set_num_threads(args.threads)

    feature_set = features.get_feature_set_from_name(args.features)

    if args.py_data:
        print('Using python data loader')
        train_data, val_data = data_loader_py(args.train, args.val, batch_size,
                                              feature_set, 'cuda:0')

    else:
        print('Using c++ data loader')
        train_data, val_data = data_loader_cc(
            args.train, args.val, feature_set, args.num_workers, batch_size,
            args.smart_fen_skipping, args.random_fen_skipping, 'cuda:0')

    print("Feature set: {}".format(feature_set.name))
    print("Num real features: {}".format(feature_set.num_real_features))
    print("Num virtual features: {}".format(feature_set.num_virtual_features))
    print("Num features: {}".format(feature_set.num_features))

    START_EPOCH = 0
    NUM_EPOCHS = 150
    SWA_START = int(0.75 * NUM_EPOCHS)

    LEARNING_RATE = 5e-4
    DECAY = 0
    EPS = 1e-7

    best_loss = 1000
    is_best = False

    early_stopping_delay = 30
    early_stopping_count = 0
    early_stopping_flag = False

    summary_location = 'logs/nnue_experiment_' + args.experiment
    save_location = '/home/esigelec/PycharmProjects/nnue-pytorch/save_models/' + args.experiment

    writer = SummaryWriter(summary_location)

    nnue = M.NNUE(feature_set=feature_set, lambda_=args.lambda_, s=1)

    train_params = [{
        'params': nnue.get_1xlr(),
        'lr': LEARNING_RATE
    }, {
        'params': nnue.get_10xlr(),
        'lr': LEARNING_RATE * 10.0
    }]

    optimizer = ranger.Ranger(train_params,
                              lr=LEARNING_RATE,
                              eps=EPS,
                              betas=(0.9, 0.999),
                              weight_decay=DECAY)

    if args.resume_from_model is not None:
        nnue, optimizer, START_EPOCH = load_ckp(args.resume_from_model, nnue,
                                                optimizer)
        nnue.set_feature_set(feature_set)
        for state in optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.cuda()

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode='min',
                                                           factor=0.1,
                                                           patience=7,
                                                           cooldown=1,
                                                           min_lr=1e-7,
                                                           verbose=True)
    swa_scheduler = SWALR(optimizer, annealing_epochs=5, swa_lr=[5e-5, 1e-4])

    nnue = nnue.cuda()
    swa_nnue = AveragedModel(nnue)

    for epoch in range(START_EPOCH, NUM_EPOCHS):

        nnue.train()

        train_interval = 100
        loss_f_sum_interval = 0.0
        loss_f_sum_epoch = 0.0
        loss_v_sum_epoch = 0.0

        if early_stopping_flag:
            print("early end of training at epoch" + str(epoch))
            break

        for batch_idx, batch in enumerate(train_data):

            batch = [_data.cuda() for _data in batch]
            us, them, white, black, outcome, score = batch

            optimizer.zero_grad()
            output = nnue(us, them, white, black)

            loss = nnue_loss(output, outcome, score, args.lambda_)

            loss.backward()
            torch.nn.utils.clip_grad_norm_(nnue.parameters(), 0.5)
            optimizer.step()

            loss_f_sum_interval += loss.float()
            loss_f_sum_epoch += loss.float()

            if batch_idx % train_interval == train_interval - 1:

                writer.add_scalar('train_loss',
                                  loss_f_sum_interval / train_interval,
                                  epoch * len(train_data) + batch_idx)

                loss_f_sum_interval = 0.0

        print("Epoch #{}\t Train_Loss: {:.8f}\t".format(
            epoch, loss_f_sum_epoch / len(train_data)))

        if epoch % 1 == 0 or (epoch + 1) == NUM_EPOCHS:

            with torch.no_grad():
                nnue.eval()
                for batch_idx, batch in enumerate(val_data):
                    batch = [_data.cuda() for _data in batch]
                    us, them, white, black, outcome, score = batch

                    _output = nnue(us, them, white, black)
                    loss_v = nnue_loss(_output, outcome, score, args.lambda_)
                    loss_v_sum_epoch += loss_v.float()

            if epoch > SWA_START:
                print("swa_mode")
                swa_nnue.update_parameters(nnue)
                swa_scheduler.step()
                checkpoint = {
                    'epoch': epoch + 1,
                    'state_dict': swa_nnue.state_dict(),
                    'optimizer': optimizer.state_dict()
                }
                save_ckp(checkpoint, save_location, 'swa_nnue.pt')

            else:

                scheduler.step(loss_v_sum_epoch / len(val_data))

                if loss_v_sum_epoch / len(val_data) <= best_loss:
                    best_loss = loss_v_sum_epoch / len(val_data)
                    is_best = True
                    early_stopping_count = 0
                else:
                    early_stopping_count += 1
                if early_stopping_delay == early_stopping_count:
                    early_stopping_flag = True

                if is_best:
                    checkpoint = {
                        'epoch': epoch + 1,
                        'state_dict': nnue.state_dict(),
                        'optimizer': optimizer.state_dict()
                    }
                    save_ckp(checkpoint, save_location)
                    is_best = False

            writer.add_scalar('val_loss', loss_v_sum_epoch / len(val_data),
                              epoch * len(train_data) + batch_idx)

            print("Epoch #{}\tVal_Loss: {:.8f}\t".format(
                epoch, loss_v_sum_epoch / len(val_data)))

    loss_v_sum_epoch = 0.0

    with torch.no_grad():
        swa_nnue.eval()
        for batch_idx, batch in enumerate(val_data):
            batch = [_data.cuda() for _data in batch]
            us, them, white, black, outcome, score = batch

            _output = swa_nnue(us, them, white, black)
            loss_v = nnue_loss(_output, outcome, score, args.lambda_)
            loss_v_sum_epoch += loss_v.float()

    print("Val_Loss: {:.8f}\t".format(loss_v_sum_epoch / len(val_data)))

    writer.close()
Esempio n. 9
0
	def handler(self,pickedClass):
		if pickedClass == 'barbarian':
			self.tab_window.main_window.classes=barbarian.Barbarian(['Nature','Perception'],['Great-Axe','Two Handaxe','Explorer Pack with 4 Javelins'])	
			self.label.setText("Barbarian\n{}".format(self.tab_window.main_window.classes.__str__()))
			self.classSelected = True

		elif pickedClass == 'bard':
			self.tab_window.main_window.classes=bard.Bard(['Persuasion', 'Stealth','Nature'],['Rapier','Diplomat Pack'],['Flute'],['Dancing Lights','Viscous Mockery'],['Charm Person','Detect Magic','Healing Word','Thunderwave'])			
			self.label.setText("Bard\n{}".format(self.tab_window.main_window.classes.__str__()))


		elif pickedClass == 'cleric':
			self.tab_window.main_window.classes=cleric.Cleric(['Persuasion','Religion'],['Mace','Scale Mail','Light Crossbow','Priest Pack','Shield'],['Guidance','Light','Mending'],['Command','Identify'],'Light Domain')			
			self.label.setText("Cleric\n{}".format(self.tab_window.main_window.classes.__str__()))


		elif pickedClass == 'druid':
			self.tab_window.main_window.classes=druid.Druid(["Animal Handling","Survival"],['Wooden Shield','Scimitar','Leather Armor','Explorers Pack','Druidic Focus'],['Posion Spray','Frostbite'],["Calm Animal","Charm Animal"])			
			self.label.setText("Druid\n{}".format(self.tab_window.main_window.classes.__str__()))


		elif pickedClass == 'fighter':
			self.tab_window.main_window.classes=fighter.Fighter(['Intimidation','Athletics'],['Chain Mail','Martial Weapon and Shield','Light Crossbow and 20 bolts','Dungeoneers pack'],'Dueling')			
			self.label.setText("Fighter\n{}".format(self.tab_window.main_window.classes.__str__()))


		elif pickedClass == 'monk':
			self.tab_window.main_window.classes=monk.Monk(['Acrobatics','Stealth'],['Shortsword','Dungeoneer pack','10 Darts'],'Guitar')
			self.label.setText("Monk\n{}".format(self.tab_window.main_window.classes.__str__()))

		
		elif pickedClass == 'paladin':
			self.tab_window.main_window.classes=paladin.Paladin(['Medicine','Religion'],['Martial weapon and Shield','5 Javelins','Priest Pack','Chain Mail','Holy Symbol'])
			self.label.setText("Paladin\n{}".format(self.tab_window.main_window.classes.__str__()))



		elif pickedClass == 'ranger':
			self.tab_window.main_window.classes=ranger.Ranger(['Animal Handling','Nature'],['Scale Mail','Two shortswords','Dungeoneer pack','Longbow 20 arrows'])			
			self.label.setText("Ranger\n{}".format(self.tab_window.main_window.classes.__str__()))
			

		elif pickedClass == 'rogue':
			self.tab_window.main_window.classes=rogue.Rogue(['Stealth','Sleight of Hand','Investigation','Athletics'],['Rapier','Shortbow with 20 arrows','Burglars pack','Leather Armor','Two daggers',"Thieve's tools"],'Thieve\'s Tools','Stealth')			
			self.label.setText("Rogue\n{}".format(self.tab_window.main_window.classes.__str__()))
			
		elif pickedClass == 'sorcerer':
			self.tab_window.main_window.classes=sorcerer.Sorcerer(['Arcana','Insight'],['Light crossbow 20 bolts','Component pouch','Dungeoneers pack','Two Daggers'],['Light','Prestidigitation','Ray of frost','Shocking grasp'],['Shield','Magic missile'],'Bloodline')
			self.label.setText("Sorceror\n{}".format(self.tab_window.main_window.classes.__str__()))
			


		elif pickedClass == 'warlock':
			self.tab_window.main_window.classes=warlock.Warlock(["Arcana","Religion"],['Light crossbow 20 bolts','Component pouch','Scholars pack','Leather Armor','Shortsword','Two daggers'],['Eldrich blast','Chill touch'],['Ray of sickness','Witch bolt'],'The Fiend')	
			self.label.setText("Warlock\n{}".format(self.tab_window.main_window.classes.__str__()))
			


		elif pickedClass == 'wizard':
			self.tab_window.main_window.classes=wizard.Wizard(['History','Insight'],['Quarterstaff','Component pouch','Scholar pack','Spellbook'],['Mage hand','Light','Ray of Frost'],['Burning hands','Charm person','Feather fall','Mage armor','Missile','Sleep'])			
			self.label.setText("Wizard\n{}".format(self.tab_window.main_window.classes.__str__()))
		
		self.classSelected = True