def test_toy_agent_gs(): core.init() agent = core.GumbelSoftmaxWrapper(ToyAgent()) agent.eval() output = agent(BATCH_X) assert output.size() == torch.Size((8, 2)) assert (output > 0).sum() == 8 agent.train() agent.temperature = 10.0 output = agent(BATCH_X) assert output.size() == torch.Size((8, 2)) assert (output > 0).sum() == 16 agent.temperature = 0.5 optimizer = torch.optim.Adam(agent.parameters()) for _ in range(1000): optimizer.zero_grad() out = agent(BATCH_X) loss = F.cross_entropy(out, BATCH_Y) loss.backward() optimizer.step() assert (agent.agent.fc1.weight.t().argmax(dim=1) == BATCH_Y).all()
def test_snapshoting(): CHECKPOINT_PATH = Path('./test_checkpoints') core.init() sender = core.GumbelSoftmaxWrapper(ToyAgent(), temperature=1) receiver = Receiver() loss = lambda sender_input, message, receiver_input, receiver_output, labels: \ (F.cross_entropy(receiver_output, labels), {}) game = core.SymbolGameGS(sender, receiver, loss) optimizer = torch.optim.Adam(game.parameters()) data = Dataset() trainer = core.Trainer( game, optimizer, train_data=data, validation_data=None, callbacks=[core.CheckpointSaver(checkpoint_path=CHECKPOINT_PATH)]) trainer.train(2) assert (CHECKPOINT_PATH / Path('1.tar')).exists() assert (CHECKPOINT_PATH / Path('2.tar')).exists() assert (CHECKPOINT_PATH / Path('final.tar')).exists() del trainer trainer = core.Trainer(game, optimizer, train_data=data) # Re-instantiate trainer trainer.load_from_latest(CHECKPOINT_PATH) assert trainer.start_epoch == 2 trainer.train(3) shutil.rmtree(CHECKPOINT_PATH) # Clean-up
def test_game_reinforce(): core.init() sender = core.ReinforceWrapper(ToyAgent()) receiver = core.ReinforceDeterministicWrapper(Receiver()) loss = lambda sender_input, message, receiver_input, receiver_output, labels, aux_input: ( -(receiver_output == labels).float(), {}, ) game = core.SymbolGameReinforce(sender, receiver, loss, sender_entropy_coeff=1e-1, receiver_entropy_coeff=0.0) optimizer = torch.optim.Adagrad(game.parameters(), lr=1e-1) data = Dataset() trainer = core.Trainer(game, optimizer, train_data=data, validation_data=None) trainer.train(5000) assert (sender.agent.fc1.weight.t().argmax( dim=1).cpu() == BATCH_Y).all(), str(sender.agent.fc1.weight)
def test_snapshoting(): CHECKPOINT_PATH = Path("./test_checkpoints") core.init() sender = core.GumbelSoftmaxWrapper(ToyAgent(), temperature=1) receiver = Receiver() loss = lambda sender_input, message, receiver_input, receiver_output, labels, aux_input: ( F.cross_entropy(receiver_output, labels), {}, ) game = core.SymbolGameGS(sender, receiver, loss) optimizer = torch.optim.Adam(game.parameters()) data = Dataset() trainer = core.Trainer( game, optimizer, train_data=data, validation_data=None, callbacks=[core.CheckpointSaver(checkpoint_path=CHECKPOINT_PATH)], ) trainer.train(2) assert (CHECKPOINT_PATH / Path("1.tar")).exists() assert (CHECKPOINT_PATH / Path("2.tar")).exists() assert (CHECKPOINT_PATH / Path("final.tar")).exists() shutil.rmtree(CHECKPOINT_PATH) # Clean-up """
def test_game_gs(): core.init() sender = core.GumbelSoftmaxWrapper(ToyAgent()) receiver = Receiver() loss = lambda sender_input, message, receiver_input, receiver_output, labels: \ (F.cross_entropy(receiver_output, labels), {}) game = core.SymbolGameGS(sender, receiver, loss) optimizer = torch.optim.Adam(game.parameters()) data = Dataset() trainer = core.Trainer(game, optimizer, train_data=data, validation_data=None) trainer.train(1000) assert (sender.agent.fc1.weight.t().argmax(dim=1).cpu() == BATCH_Y).all()
def test_toy_agent_reinforce(): core.init() agent = core.ReinforceWrapper(ToyAgent()) optimizer = torch.optim.Adam(agent.parameters()) for _ in range(1000): optimizer.zero_grad() output, log_prob, entropy = agent(BATCH_X) loss = -((output == BATCH_Y).float() * log_prob).mean() loss.backward() optimizer.step() assert (agent.agent.fc1.weight.t().argmax(dim=1).cpu() == BATCH_Y).all()
def test_toy_counting_gradient(): core.init() agent = ToyAgent() game = ToyGame(agent) optimizer = core.build_optimizer(agent.parameters()) d = ToyDataset() trainer = core.Trainer(game, optimizer, train_data=d, validation_data=None) trainer.train(10000) are_close = torch.allclose(agent.fc1.weight, torch.ones_like(agent.fc1.weight), rtol=0.05) assert are_close, agent.fc1.weight
def parse_arguments(): parser = argparse.ArgumentParser() parser.add_argument( '--root', default='', help='data root folder') # 2-agents specific parameters parser.add_argument('--tau_s', type=float, default=10.0, help='Sender Gibbs temperature') parser.add_argument('--game_size', type=int, default=2, help='Number of images seen by an agent') parser.add_argument('--same', type=int, default=0, help='Use same concepts') parser.add_argument('--vocab_size', type=int, default=100, help='Vocabulary size') parser.add_argument('--embedding_size', type=int, default=50, help='embedding size') parser.add_argument('--hidden_size', type=int, default=20, help='hidden size (number of filters informed sender)') parser.add_argument('--batches_per_epoch', type=int, default=100, help='Batches in a single training/validation epoch') parser.add_argument('--inf_rec', type=int, default=0, help='Use informed receiver') parser.add_argument('--mode', type=str, default='rf', help='Training mode: Gumbel-Softmax (gs) or Reinforce (rf). Default: rf.') parser.add_argument('--gs_tau', type=float, default=1.0, help='GS temperature') opt = core.init(parser) assert opt.game_size >= 1 return opt
def get_params(params): print(params) parser = argparse.ArgumentParser() parser.add_argument('--receiver_layers', type=int, default=-1) parser.add_argument('--cell_layers', type=int, default=1) parser.add_argument( '--receiver_hidden', type=int, default=10, help='Size of the hidden layer of Receiver (default: 10)') parser.add_argument('--receiver_cell', type=str, default='rnn') parser.add_argument( '--receiver_emb', type=int, default=10, help='Size of the embeddings of Receiver (default: 10)') parser.add_argument('--n_a', type=int, default=2) parser.add_argument('--n_v', type=int, default=10) parser.add_argument('--language', type=str, choices=['identity', 'rotated']) parser.add_argument('--loss_type', choices=['autoenc', 'mixed', 'linear'], default='autoenc') args = core.init(arg_parser=parser, params=params) return args
def get_params(): parser = argparse.ArgumentParser() parser.add_argument('--n_features', type=int, default=5, help='Dimensionality of the "concept" space (default: 10)') parser.add_argument('--n_attributes', type=int, default=2, help='Number of attributes (default: 2') parser.add_argument('--sender_hidden', type=int, default=200, help='Size of the hidden layer of Sender (default: 200)') parser.add_argument('--receiver_hidden', type=int, default=200, help='Size of the hidden layer of Receiver (default: 200)') parser.add_argument('--sender_embedding', type=int, default=50, help='Dimensionality of the embedding hidden layer for Sender (default: 10)') parser.add_argument('--receiver_embedding', type=int, default=50, help='Dimensionality of the embedding hidden layer for Receiver (default: 10)') parser.add_argument('--rnn_cell', type=str, default='rnn') parser.add_argument('--pretraining_sender_lr', type=float, default=1e-3, help="Learning rate for Sender's parameters (default: 1e-3)") parser.add_argument('--pretraining_receiver_lr', type=float, default=1e-3, help="Learning rate for Receiver's parameters (default: 1e-3)") parser.add_argument('--sender_lr', type=float, default=1e-3, help="Learning rate for Sender's parameters (default: 1e-3)") parser.add_argument('--receiver_lr', type=float, default=1e-5, help="Learning rate for Receiver's parameters (default: 1e-3)") parser.add_argument('--seed', type=int, default=171, help="Random seed") parser.add_argument('--pretrain', action='store_true', default=False, help="") parser.add_argument('--padding', action='store_true', default=True, help="") parser.add_argument('--config', type=str, default=None) args = core.init(parser) print(args) return args
def get_params(params): print(params) parser = argparse.ArgumentParser() parser.add_argument('--receiver_layers', type=int, default=-1) parser.add_argument('--n_points', type=int, default=1000) parser.add_argument('--cell_layers', type=int, default=1) parser.add_argument( '--receiver_hidden', type=int, default=10, help='Size of the hidden layer of Receiver (default: 10)') parser.add_argument('--receiver_cell', type=str, default='lstm', choices=["lstm", "tree"]) parser.add_argument( '--receiver_emb', type=int, default=10, help='Size of the embeddings of Receiver (default: 10)') parser.add_argument('--lenses', type=int, default=0) args = core.init(arg_parser=parser, params=params) return args
def main(params): opts = core.init(params=params) kwargs = {'num_workers': 1, 'pin_memory': True} if opts.cuda else {} transform = transforms.ToTensor() train_loader = torch.utils.data.DataLoader(datasets.MNIST( './data', train=True, download=True, transform=transform), batch_size=opts.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( './data', train=False, transform=transform), batch_size=opts.batch_size, shuffle=True, **kwargs) sender = Sender(opts.vocab_size) receiver = Receiver(opts.vocab_size) game = VAE_Game(sender, receiver) optimizer = core.build_optimizer(game.parameters()) # initialize and launch the trainer trainer = core.Trainer(game=game, optimizer=optimizer, train_data=train_loader, validation_data=test_loader, callbacks=[ core.ConsoleLogger(as_json=True, print_train_loss=True), ImageDumpCallback(test_loader.dataset) ]) trainer.train(n_epochs=opts.n_epochs) core.close()
def get_params(params): parser = argparse.ArgumentParser() parser.add_argument('--temperature', type=float, default=1.0, help="GS temperature for the sender (default: 1)") parser.add_argument('--early_stopping_thr', type=float, default=1.0, help="Early stopping threshold on accuracy (default: 1.0)") parser.add_argument('--deeper', type=int, default=0, help="Addition FC layer") parser.add_argument('--deeper_alice', type=int, default=1, help="Addition FC layer goes to Alice") parser.add_argument('--p_corrupt', type=float, default=0, help="Probability of corrupting a label (default: 0.0)") parser.add_argument('--softmax_non_linearity', type=int, default=0, help="Disable GS training, treat channel as softmax non-linearity (default: 0)") parser.add_argument('--linear_channel', type=int, default=0, help="Disable GS training, treat channel as a linear connection (default: 0)") args = core.init(parser, params) assert 0.0 <= args.p_corrupt <= 1.0 return args
def get_params(params): print(params) parser = argparse.ArgumentParser() parser.add_argument("--receiver_layers", type=int, default=-1) parser.add_argument("--cell_layers", type=int, default=1) parser.add_argument( "--receiver_hidden", type=int, default=10, help="Size of the hidden layer of Receiver (default: 10)", ) parser.add_argument("--receiver_cell", type=str, default="rnn") parser.add_argument( "--receiver_emb", type=int, default=10, help="Size of the embeddings of Receiver (default: 10)", ) parser.add_argument("--n_a", type=int, default=2) parser.add_argument("--n_v", type=int, default=10) parser.add_argument("--language", type=str, choices=["identity", "rotated"]) parser.add_argument("--loss_type", choices=["autoenc", "mixed", "linear"], default="autoenc") args = core.init(arg_parser=parser, params=params) return args
def get_params() -> argparse.ArgumentParser: parser = argparse.ArgumentParser() parser.add_argument('--n_features', type=int, default=5, help='Dimensionality of the "concept" space (default: 10)') parser.add_argument('--n_attributes', type=int, default=2, help='Number of attributes (default: 2') parser.add_argument('--seed', type=int, default=171, help="Random seed") parser.add_argument('--neptune_project', type=str, default=None) parser.add_argument('--config', type=str, default=None) # Agent architecture parser.add_argument('--sender_hidden', type=int, default=200, help='Size of the hidden layer of Sender (default: 200)') parser.add_argument('--receiver_hidden', type=int, default=200, help='Size of the hidden layer of Receiver (default: 200)') parser.add_argument('--sender_embedding', type=int, default=50, help='Dimensionality of the embedding hidden layer for Sender (default: 50)') parser.add_argument('--receiver_embedding', type=int, default=50, help='Dimensionality of the embedding hidden layer for Receiver (default: 50)') parser.add_argument('--rnn_cell', type=str, default='rnn') args = core.init(parser) print(args) return args
def get_params(params): parser = argparse.ArgumentParser() parser.add_argument('--temperature', type=float, default=1.0, help="GS temperature for the sender (default: 1)") parser.add_argument( '--early_stopping_thr', type=float, default=1.0, help="Early stopping threshold on accuracy (default: 1.0)") parser.add_argument( '--softmax_non_linearity', type=int, default=0, help= "Disable GS training, treat channel as softmax non-linearity (default: 0)" ) parser.add_argument( '--linear_channel', type=int, default=0, help= "Disable GS training, treat channel as a linear connection (default: 0)" ) args = core.init(parser, params) assert not (args.softmax_non_linearity == 1 and args.linear_channel == 1) return args
def get_params(params): parser = argparse.ArgumentParser() parser.add_argument('--n_attributes', type=int, default=4, help='') parser.add_argument('--n_values', type=int, default=4, help='') parser.add_argument('--data_scaler', type=int, default=100) parser.add_argument('--stats_freq', type=int, default=0) parser.add_argument('--baseline', type=str, choices=['no', 'mean', 'builtin'], default='mean') parser.add_argument('--density_data', type=int, default=0, help='no sampling if equal 0') parser.add_argument('--sender_hidden', type=int, default=50, help='Size of the hidden layer of Sender (default: 10)') parser.add_argument('--receiver_hidden', type=int, default=50, help='Size of the hidden layer of Receiver (default: 10)') parser.add_argument('--sender_entropy_coeff', type=float, default=1e-2, help="Entropy regularisation coeff for Sender (default: 1e-2)") parser.add_argument('--sender_cell', type=str, default='rnn') parser.add_argument('--receiver_cell', type=str, default='rnn') parser.add_argument('--sender_emb', type=int, default=10, help='Size of the embeddings of Sender (default: 10)') parser.add_argument('--receiver_emb', type=int, default=10, help='Size of the embeddings of Receiver (default: 10)') parser.add_argument('--early_stopping_thr', type=float, default=0.99999, help="Early stopping threshold on accuracy (defautl: 0.99999)") args = core.init(arg_parser=parser, params=params) return args
def get_common_opts(params): parser = argparse.ArgumentParser() parser.add_argument( "--weight_decay", type=float, default=10e-6, help="Weight decay used for SGD", ) parser.add_argument( "--use_larc", action="store_true", default=False, help="Use LARC optimizer" ) parser.add_argument( "--pdb", action="store_true", default=False, help="Run the game with pdb enabled", ) get_data_opts(parser) get_gs_opts(parser) get_vision_module_opts(parser) get_game_arch_opts(parser) opts = core.init(arg_parser=parser, params=params) return opts
def main(params): opts = core.init(params=params) root = os.path.join('data', 'dsprites-dataset', 'dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz') if not os.path.exists(root): import subprocess print('Now download dsprites-dataset') subprocess.call([os.path.join('egg', 'zoo', 'dsprites_bvae', 'data_loaders', 'download_dsprites.sh')]) print('Finished') train_loader, test_loader = get_dsprites_dataloader(path_to_data=root, batch_size=opts.batch_size, image=True) image_shape = (64, 64) sender = VisualSender() receiver = VisualReceiver() game = betaVAE_Game(sender, receiver) optimizer = core.build_optimizer(game.parameters()) # initialize and launch the trainer trainer = core.Trainer(game=game, optimizer=optimizer, train_data=train_loader, validation_data=test_loader, callbacks=[core.ConsoleLogger(as_json=True, print_train_loss=True), ImageDumpCallback(test_loader.dataset, image_shape=image_shape), TopographicSimilarity(sender_input_distance_fn='euclidean', message_distance_fn='euclidean', is_gumbel=False), PosDisent()]) trainer.train(n_epochs=opts.n_epochs) core.close()
def get_params(): parser = argparse.ArgumentParser() parser.add_argument('--batches_per_epoch', type=int, default=1000, help='Number of batches per epoch (default: 1000)') parser.add_argument('--sender_hidden', type=int, default=10, help='Size of the hidden layer of Sender (default: 10)') parser.add_argument('--receiver_hidden', type=int, default=10, help='Size of the hidden layer of Receiver (default: 10)') parser.add_argument('--sender_embedding', type=int, default=10, help='Dimensionality of the embedding hidden layer for Sender (default: 10)') parser.add_argument('--receiver_embedding', type=int, default=10, help='Dimensionality of the embedding hidden layer for Receiver (default: 10)') parser.add_argument('--sender_cell', type=str, default='rnn', help='Type of the cell used for Sender {rnn, gru, lstm} (default: rnn)') parser.add_argument('--receiver_cell', type=str, default='rnn', help='Type of the cell used for Receiver {rnn, gru, lstm} (default: rnn)') parser.add_argument('--temperature', type=float, default=1.0, help="GS temperature for the sender (default: 1.0)") parser.add_argument('--max_n', type=int, default=10, help="Max n in a^nb^n(default: 10)") args = core.init(parser) return args
def get_params(params): parser = argparse.ArgumentParser() parser.add_argument( "--temperature", type=float, default=1.0, help="GS temperature for the sender (default: 1)", ) parser.add_argument( "--sender_rows", type=int, default=28, help="Number of image rows revealed to Sender (default: 28)", ) parser.add_argument( "--early_stopping_thr", type=float, default=0.98, help="Early stopping threshold on accuracy (defautl: 0.98)", ) parser.add_argument("--n_labels", type=int, default=10) parser.add_argument("--n_hidden", type=int, default=0) args = core.init(parser, params=params) return args
def test_symbol_wrapper(): core.init() receiver = core.SymbolReceiverWrapper(Receiver(), vocab_size=15, agent_input_size=5) # when trained with REINFORCE, the message would be encoded as long ids message_rf = torch.randint(high=15, size=(16,)).long() output_rf = receiver(message_rf) assert output_rf.size() == torch.Size((16, 5)) # when trained with Gumbel-Softmax, the message would be encoded as one-hots message_gs = torch.zeros((16, 15)) message_gs.scatter_(1, message_rf.unsqueeze(1), 1.0) # same message, one-hot-encoded output_gs = receiver(message_gs) assert output_rf.eq(output_gs).all().item() == 1
def get_params(): parser = argparse.ArgumentParser() parser.add_argument( '--n_features', type=int, default=5, help='Dimensionality of the "concept" space (default: 10)') parser.add_argument('--batches_per_epoch', type=int, default=1000, help='Number of batches per epoch (default: 1000)') parser.add_argument( '--sender_hidden', type=int, default=200, help='Size of the hidden layer of Sender (default: 200)') parser.add_argument( '--receiver_hidden', type=int, default=200, help='Size of the hidden layer of Receiver (default: 200)') parser.add_argument( '--sender_embedding', type=int, default=50, help= 'Dimensionality of the embedding hidden layer for Sender (default: 10)' ) parser.add_argument( '--receiver_embedding', type=int, default=50, help= 'Dimensionality of the embedding hidden layer for Receiver (default: 10)' ) parser.add_argument('--rnn_cell', type=str, default='rnn') parser.add_argument( '--sender_lr', type=float, default=0.0005, help="Learning rate for Sender's parameters (default: 1e-3)") parser.add_argument( '--receiver_lr', type=float, default=0.0005, help="Learning rate for Receiver's parameters (default: 1e-3)") parser.add_argument('--sender_entropy_coeff', type=float, default=0.1) parser.add_argument('--receiver_entropy_coeff', type=float, default=0) parser.add_argument('--length_cost', type=float, default=0.05) parser.add_argument('--seed', type=int, default=171, help="Random seed") parser.add_argument('--pretrain', type=bool, default=False, help="") parser.add_argument('--config', type=str, default=None) args = core.init(parser) print(args) return args
def get_params(): parser = argparse.ArgumentParser() parser.add_argument( '--n_features', type=int, default=5, help='Dimensionality of the "concept" space (default: 10)') parser.add_argument('--n_attributes', type=int, default=2, help='Number of attributes (default: 2') parser.add_argument('--seed', type=int, default=171, help="Random seed") parser.add_argument('--config', type=str, default=None) parser.add_argument('--visual', action='store_true', default=False, help="Use visual input instead of one-hot vectors") # Agent architecture parser.add_argument( '--sender_hidden', type=int, default=200, help='Size of the hidden layer of Sender (default: 200)') parser.add_argument( '--receiver_hidden', type=int, default=200, help='Size of the hidden layer of Receiver (default: 200)') parser.add_argument( '--sender_embedding', type=int, default=50, help= 'Dimensionality of the embedding hidden layer for Sender (default: 50)' ) parser.add_argument( '--receiver_embedding', type=int, default=50, help= 'Dimensionality of the embedding hidden layer for Receiver (default: 50)' ) parser.add_argument('--rnn_cell', type=str, default='rnn') parser.add_argument( '--sender_lr', type=float, default=0.0002, help="Learning rate for Sender's parameters (default: 1e-3)") parser.add_argument( '--receiver_lr', type=float, default=0.0002, help="Learning rate for Receiver's parameters (default: 1e-3)") args = core.init(parser) print(args) return args
def get_params(): parser = argparse.ArgumentParser() parser.add_argument('--train_data', type=str, default=None, help='Path to the train data') parser.add_argument('--validation_data', type=str, default=None, help='Path to the validation data') parser.add_argument('--dump_data', type=str, default=None, help='Path to the data for which to produce output information') parser.add_argument('--dump_output', type=str, default=None, help='Path for dumping output information') parser.add_argument('--batches_per_epoch', type=int, default=1000, help='Number of batches per epoch (default: 1000)') parser.add_argument('--sender_hidden', type=int, default=10, help='Size of the hidden layer of Sender (default: 10)') parser.add_argument('--receiver_hidden', type=int, default=10, help='Size of the hidden layer of Receiver (default: 10)') parser.add_argument('--sender_embedding', type=int, default=10, help='Dimensionality of the embedding hidden layer for Sender (default: 10)') parser.add_argument('--receiver_embedding', type=int, default=10, help='Dimensionality of the embedding hidden layer for Receiver (default: 10)') parser.add_argument('--sender_cell', type=str, default='rnn', help='Type of the cell used for Sender {rnn, gru, lstm} (default: rnn)') parser.add_argument('--receiver_cell', type=str, default='rnn', help='Type of the cell used for Receiver {rnn, gru, lstm} (default: rnn)') parser.add_argument('--sender_layers', type=int, default=1, help="Number of layers in Sender's RNN (default: 1)") parser.add_argument('--receiver_layers', type=int, default=1, help="Number of layers in Receiver's RNN (default: 1)") parser.add_argument('--sender_entropy_coeff', type=float, default=1e-2, help='The entropy regularisation coefficient for Sender (default: 1e-2)') parser.add_argument('--receiver_entropy_coeff', type=float, default=1e-2, help='The entropy regularisation coefficient for Receiver (default: 1e-2)') parser.add_argument('--sender_lr', type=float, default=1e-1, help="Learning rate for Sender's parameters (default: 1e-1)") parser.add_argument('--receiver_lr', type=float, default=1e-1, help="Learning rate for Receiver's parameters (default: 1e-1)") parser.add_argument('--temperature', type=float, default=1.0, help="GS temperature for the sender (default: 1.0)") parser.add_argument('--train_mode', type=str, default='gs', help="Selects whether GumbelSoftmax or Reinforce is used" "(default: gs)") parser.add_argument('--n_classes', type=int, default=None, help='Number of classes for Receiver to output. If not set, is automatically deduced from ' 'the training set') parser.add_argument('--force_eos', action='store_true', default=False, help="When set, forces that the last symbol of the message is EOS (default: False)") args = core.init(parser) return args
def get_params(params): parser = argparse.ArgumentParser() parser.add_argument('--n_features', type=int, default=10, help='Dimensionality of the "concept" space (default: 10)') parser.add_argument('--batches_per_epoch', type=int, default=1000, help='Number of batches per epoch (default: 1000)') parser.add_argument('--dim_dataset', type=int, default=10240, help='Dim of constructing the data (default: 10240)') parser.add_argument('--force_eos', type=int, default=0, help='Force EOS at the end of the messages (default: 0)') parser.add_argument('--sender_hidden', type=int, default=10, help='Size of the hidden layer of Sender (default: 10)') parser.add_argument('--receiver_hidden', type=int, default=10, help='Size of the hidden layer of Receiver (default: 10)') parser.add_argument('--receiver_num_layers', type=int, default=1, help='Number hidden layers of receiver. Only in reinforce (default: 1)') parser.add_argument('--sender_num_layers', type=int, default=1, help='Number hidden layers of receiver. Only in reinforce (default: 1)') parser.add_argument('--receiver_num_heads', type=int, default=8, help='Number of attention heads for Transformer Receiver (default: 8)') parser.add_argument('--sender_num_heads', type=int, default=8, help='Number of self-attention heads for Transformer Sender (default: 8)') parser.add_argument('--sender_embedding', type=int, default=10, help='Dimensionality of the embedding hidden layer for Sender (default: 10)') parser.add_argument('--receiver_embedding', type=int, default=10, help='Dimensionality of the embedding hidden layer for Receiver (default: 10)') parser.add_argument('--causal_sender', default=False, action='store_true') parser.add_argument('--causal_receiver', default=False, action='store_true') parser.add_argument('--sender_generate_style', type=str, default='in-place', choices=['standard', 'in-place'], help='How the next symbol is generated within the TransformerDecoder (default: in-place)') parser.add_argument('--sender_cell', type=str, default='rnn', help='Type of the cell used for Sender {rnn, gru, lstm, transformer} (default: rnn)') parser.add_argument('--receiver_cell', type=str, default='rnn', help='Type of the model used for Receiver {rnn, gru, lstm, transformer} (default: rnn)') parser.add_argument('--sender_entropy_coeff', type=float, default=1e-1, help='The entropy regularisation coefficient for Sender (default: 1e-1)') parser.add_argument('--receiver_entropy_coeff', type=float, default=1e-1, help='The entropy regularisation coefficient for Receiver (default: 1e-1)') parser.add_argument('--probs', type=str, default='uniform', help="Prior distribution over the concepts (default: uniform)") parser.add_argument('--length_cost', type=float, default=0.0, help="Penalty for the message length, each symbol would before <EOS> would be " "penalized by this cost (default: 0.0)") parser.add_argument('--name', type=str, default='model', help="Name for your checkpoint (default: model)") parser.add_argument('--early_stopping_thr', type=float, default=0.98, help="Early stopping threshold on accuracy (default: 0.98)") args = core.init(parser, params) return args
def get_params(params): parser = argparse.ArgumentParser() parser.add_argument("--n_attributes", type=int, default=4, help="") parser.add_argument("--n_values", type=int, default=4, help="") parser.add_argument("--data_scaler", type=int, default=100) parser.add_argument("--stats_freq", type=int, default=0) parser.add_argument( "--baseline", type=str, choices=["no", "mean", "builtin"], default="mean" ) parser.add_argument( "--density_data", type=int, default=0, help="no sampling if equal 0" ) parser.add_argument( "--sender_hidden", type=int, default=50, help="Size of the hidden layer of Sender (default: 10)", ) parser.add_argument( "--receiver_hidden", type=int, default=50, help="Size of the hidden layer of Receiver (default: 10)", ) parser.add_argument( "--sender_entropy_coeff", type=float, default=1e-2, help="Entropy regularisation coeff for Sender (default: 1e-2)", ) parser.add_argument("--sender_cell", type=str, default="rnn") parser.add_argument("--receiver_cell", type=str, default="rnn") parser.add_argument( "--sender_emb", type=int, default=10, help="Size of the embeddings of Sender (default: 10)", ) parser.add_argument( "--receiver_emb", type=int, default=10, help="Size of the embeddings of Receiver (default: 10)", ) parser.add_argument( "--early_stopping_thr", type=float, default=0.99999, help="Early stopping threshold on accuracy (defautl: 0.99999)", ) args = core.init(arg_parser=parser, params=params) return args
def test_temperature_updater_callback(): core.init() sender = core.GumbelSoftmaxWrapper(ToyAgent(), temperature=1) receiver = Receiver() loss = lambda sender_input, message, receiver_input, receiver_output, labels: \ (F.cross_entropy(receiver_output, labels), {}) game = core.SymbolGameGS(sender, receiver, loss) optimizer = torch.optim.Adam(game.parameters()) data = Dataset() trainer = core.Trainer( game, optimizer, train_data=data, validation_data=None, callbacks=[core.TemperatureUpdater(agent=sender, decay=0.9)]) trainer.train(1) assert sender.temperature == 0.9
def main(params): # initialize the egg lib opts = core.init(params=params) # get pre-defined common line arguments (batch/vocab size, etc). # See egg/core/util.py for a list # prepare the dataset kwargs = {"num_workers": 1, "pin_memory": True} if opts.cuda else {} transform = transforms.ToTensor() train_loader = torch.utils.data.DataLoader(datasets.MNIST( "./data", train=True, download=True, transform=transform), batch_size=opts.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( "./data", train=False, transform=transform), batch_size=opts.batch_size, shuffle=True, **kwargs) # initialize the agents and the game sender = Sender(opts.vocab_size) # the "data" transform part of an agent sender = core.GumbelSoftmaxWrapper( sender, temperature=1.0) # wrapping into a GS interface receiver = Receiver() receiver = core.SymbolReceiverWrapper(receiver, vocab_size=opts.vocab_size, agent_input_size=400) # setting up as a standard Sender/Receiver game with 1 symbol communication game = core.SymbolGameGS(sender, receiver, loss) # This callback would be called at the end of each epoch by the Trainer; it reduces the sampling # temperature used by the GS temperature_updater = core.TemperatureUpdater(agent=sender, decay=0.75, minimum=0.01) # get an optimizer that is set up by common command line parameters, # defaults to Adam optimizer = core.build_optimizer(game.parameters()) # initialize and launch the trainer trainer = core.Trainer( game=game, optimizer=optimizer, train_data=train_loader, validation_data=test_loader, callbacks=[ temperature_updater, core.ConsoleLogger(as_json=True, print_train_loss=True), ], ) trainer.train(n_epochs=opts.n_epochs) core.close()
def get_opts(params): parser = argparse.ArgumentParser() parser.add_argument("--pdb", action="store_true", default=False, help="Run the game with pdb enabled") get_other_opts(parser) opts = core.init(arg_parser=parser, params=params) return opts