# define model mt = MusicTransformer(embedding_dim=config.embedding_dim, vocab_size=config.vocab_size, num_layer=config.num_layers, max_seq=config.max_seq, dropout=config.dropout, debug=config.debug, loader_path=config.load_path) if config.init_ckpt is not None: mt.load_state_dict(torch.load(config.init_ckpt)) print("Weights from %s loaded" % config.init_ckpt) mt.to(config.device) opt = optim.Adam(mt.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9) # setting rate inside scheduler = CustomSchedule( config.embedding_dim, optimizer=opt) # custom implementation of rate decay # multi-GPU set if torch.cuda.device_count() > 1: single_mt = mt mt = torch.nn.DataParallel(mt, output_device=torch.cuda.device_count() - 1) else: single_mt = mt # init metric set metric_set = MetricsSet({ 'accuracy': CategoricalAccuracy(), 'loss': SmoothCrossEntropyLoss(config.label_smooth, config.vocab_size, config.pad_token),
print(dataset) # load model learning_rate = config.l_r # define model mt = MusicTransformer(embedding_dim=config.embedding_dim, vocab_size=config.vocab_size, num_layer=config.num_layers, max_seq=config.max_seq, dropout=config.dropout, debug=config.debug, loader_path=config.load_path) mt.to(config.device) opt = optim.Adam(mt.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9) scheduler = CustomSchedule(config.embedding_dim, optimizer=opt) # multi-GPU set if torch.cuda.device_count() > 1: single_mt = mt mt = torch.nn.DataParallel(mt, output_device=torch.cuda.device_count() - 1) else: single_mt = mt # init metric set metric_set = MetricsSet({ 'accuracy': CategoricalAccuracy(), 'loss': SmoothCrossEntropyLoss(config.label_smooth, config.vocab_size, config.pad_token),
# load model learning_rate = config.l_r # define model mt = MusicTransformer( embedding_dim=config.embedding_dim, vocab_size=config.vocab_size, num_layer=config.num_layers, max_seq=config.max_seq, dropout=config.dropout, debug=config.debug, loader_path=config.load_path ) mt.to(config.device) opt = optim.Adam(mt.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9) scheduler = CustomSchedule(config.embedding_dim, optimizer=opt) # multi-GPU set if torch.cuda.device_count() > 1: single_mt = mt mt = torch.nn.DataParallel(mt, output_device=torch.cuda.device_count()-1) else: single_mt = mt # init metric set metric_set = MetricsSet({ 'accuracy': CategoricalAccuracy(), 'loss': SmoothCrossEntropyLoss(config.label_smooth, config.vocab_size, config.pad_token), 'bucket': LogitsBucketting(config.vocab_size) })