Esempio n. 1
0
def eval(model_path):
    vector_length = 8
    memory_size = (128, 20)
    hidden_layer_size = 100
    lstm_controller = not args.ff

    model = NTM(vector_length, hidden_layer_size, memory_size, lstm_controller)

    print(f"Loading model from {model_path}")
    checkpoint = torch.load(model_path, map_location=torch.device('cpu'))
    model.load_state_dict(checkpoint)
    model.eval()

    lengths = [20, 100]
    for l in lengths:
        sequence_length = l
        input, target = get_training_sequence(sequence_length, sequence_length,
                                              vector_length)
        state = model.get_initial_state()
        for vector in input:
            _, state = model(vector, state)
        y_out = torch.zeros(target.size())
        for j in range(len(target)):
            y_out[j], state = model(torch.zeros(1, vector_length + 1), state)
        y_out_binarized = y_out.clone().data
        y_out_binarized.apply_(lambda x: 0 if x < 0.5 else 1)

        plot_copy_results(target, y_out, vector_length)
Esempio n. 2
0
# if we use lr schedule, we need to keep track of errors over time
if args.lr_rate is not None:
  print "Using lr schedule rate of:", args.lr_rate
  errors = {}
  error_sum = 0

# deserialize saved model if path given
if args.model is None:
  # If not using a saved model, initialize from params
  vec_size = args.vec_size
  seq = SequenceGen(args.task, vec_size, args.hi, args.lo)
  hidden_size = args.units # Size of hidden layer of neurons
  N = args.N # number of memory locations
  M = args.M # size of a memory location
  heads = args.heads
  model = NTM(seq.in_size, seq.out_size, hidden_size, N, M, vec_size, heads)
else:
  # otherwise, load the model from specified file
  print "Using saved model:", args.model
  model = deserialize(args.model)
  vec_size = model.vec_size # vec size comes from model
  seq = SequenceGen(args.task, vec_size, args.hi, args.lo)

# An object that keeps the optimizer state during training
optimizer = RMSProp(model.W)

n = 0 # counts the number of sequences trained on
bpc = None # keeps track of trailing bpc (cost)

while n < 100:
Esempio n. 3
0
# if we use lr schedule, we need to keep track of errors over time
if args.lr_rate is not None:
  print "Using lr schedule rate of:", args.lr_rate
  errors = {}
  error_sum = 0

# deserialize saved model if path given
if args.model is None:
  # If not using a saved model, initialize from params
  vec_size = args.vec_size
  seq = SequenceGen(args.task, vec_size, args.hi, args.lo)
  hidden_size = args.units # Size of hidden layer of neurons
  N = args.N # number of memory locations
  M = args.M # size of a memory location
  heads = args.heads
  model = NTM(seq.in_size, seq.out_size, hidden_size, N, M, vec_size, heads)
else:
  # otherwise, load the model from specified file
  print "Using saved model:", args.model
  model = deserialize(args.model)
  vec_size = model.vec_size # vec size comes from model
  seq = SequenceGen(args.task, vec_size, args.hi, args.lo)

# An object that keeps the optimizer state during training
optimizer = RMSProp(model.W)

n = 0 # counts the number of sequences trained on
bpc = None # keeps track of trailing bpc (cost)

# train forever
while True:
Esempio n. 4
0
# if we use lr schedule, we need to keep track of errors over time
if args.lr_rate is not None:
    print "Using lr schedule rate of:", args.lr_rate
    errors = {}
    error_sum = 0

# deserialize saved model if path given
if args.model is None:
    # If not using a saved model, initialize from params
    vec_size = args.vec_size
    seq = SequenceGen(args.task, vec_size, args.hi, args.lo)
    hidden_size = args.units  # Size of hidden layer of neurons
    N = args.N  # number of memory locations
    M = args.M  # size of a memory location
    heads = args.heads
    model = NTM(seq.in_size, seq.out_size, hidden_size, N, M, vec_size, heads)
else:
    # otherwise, load the model from specified file
    print "Using saved model:", args.model
    model = deserialize(args.model)
    vec_size = model.vec_size  # vec size comes from model
    seq = SequenceGen(args.task, vec_size, args.hi, args.lo)

# An object that keeps the optimizer state during training
optimizer = RMSProp(model.W)

n = 0  # counts the number of sequences trained on
bpc = None  # keeps track of trailing bpc (cost)

while n < 100:
Esempio n. 5
0
                    help="The number of out bits")

# Tensorflow checkpoints and tensorboard
parser.add_argument('--checkpoint_dir', action="store", dest="checkpoint_dir", default='./tf_ntm_ckpt/',
                    help="The location to save the checkpoint")
parser.add_argument('--max_to_keep', action="store", dest="max_to_keep", default=3, type=int,
                    help="Maximum number of checkpoint to keep")
parser.add_argument('--report_interval', action="store", dest="report_interval", default=10, type=int,
                    help="The report interval for the train information")
parser.add_argument('--train_log_dir', action="store", dest="train_log_dir", default='./tf_ntm_logs/gradient_tape/',
                    help="The location to save the training logs")

arg = parser.parse_args()

# Training
ntm_model = NTM(arg.controller_size, arg.memory_locations, arg.memory_vector_size, arg.maximum_shifts,
                arg.out_bits, arg.learn_r_bias, arg.learn_w_bias, arg.learn_m_bias)

optimizer = tf.optimizers.RMSprop(learning_rate=arg.learning_rate, momentum=arg.momentum)
bce_loss = tf.losses.BinaryCrossentropy()

# Training metrics
train_loss = tf.metrics.Mean(name="train_loss")
train_cost = tf.metrics.Mean(name="train_cost")

# Tensorboard
# tensorboard --logdir tf_ntm_logs/gradient_tape
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = arg.train_log_dir + current_time + '/train'

# Checkpoints
ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer=optimizer, net=ntm_model)
Esempio n. 6
0
def train(epochs=50_000):
    tensorboard_log_folder = f"runs/copy-task-{datetime.now().strftime('%Y-%m-%dT%H%M%S')}"
    writer = SummaryWriter(tensorboard_log_folder)
    print(f"Training for {epochs} epochs, logging in {tensorboard_log_folder}")
    sequence_min_length = 1
    sequence_max_length = 20
    vector_length = 8
    memory_size = (128, 20)
    hidden_layer_size = 100
    batch_size = 4
    lstm_controller = not args.ff

    writer.add_scalar("sequence_min_length", sequence_min_length)
    writer.add_scalar("sequence_max_length", sequence_max_length)
    writer.add_scalar("vector_length", vector_length)
    writer.add_scalar("memory_size0", memory_size[0])
    writer.add_scalar("memory_size1", memory_size[1])
    writer.add_scalar("hidden_layer_size", hidden_layer_size)
    writer.add_scalar("lstm_controller", lstm_controller)
    writer.add_scalar("seed", seed)
    writer.add_scalar("batch_size", batch_size)

    model = NTM(vector_length, hidden_layer_size, memory_size, lstm_controller)

    optimizer = optim.RMSprop(model.parameters(),
                              momentum=0.9,
                              alpha=0.95,
                              lr=1e-4)
    feedback_frequency = 100
    total_loss = []
    total_cost = []

    os.makedirs("models", exist_ok=True)
    if os.path.isfile(model_path):
        print(f"Loading model from {model_path}")
        checkpoint = torch.load(model_path, map_location=torch.device('cpu'))
        model.load_state_dict(checkpoint)

    for epoch in range(epochs + 1):
        optimizer.zero_grad()
        input, target = get_training_sequence(sequence_min_length,
                                              sequence_max_length,
                                              vector_length, batch_size)
        state = model.get_initial_state(batch_size)
        for vector in input:
            _, state = model(vector, state)
        y_out = torch.zeros(target.size())
        for j in range(len(target)):
            y_out[j], state = model(torch.zeros(batch_size, vector_length + 1),
                                    state)
        loss = F.binary_cross_entropy(y_out, target)
        loss.backward()
        optimizer.step()
        total_loss.append(loss.item())
        y_out_binarized = y_out.clone().data
        y_out_binarized.apply_(lambda x: 0 if x < 0.5 else 1)
        cost = torch.sum(torch.abs(y_out_binarized - target)) / len(target)
        total_cost.append(cost.item())
        if epoch % feedback_frequency == 0:
            running_loss = sum(total_loss) / len(total_loss)
            running_cost = sum(total_cost) / len(total_cost)
            print(f"Loss at step {epoch}: {running_loss}")
            writer.add_scalar('training loss', running_loss, epoch)
            writer.add_scalar('training cost', running_cost, epoch)
            total_loss = []
            total_cost = []

    torch.save(model.state_dict(), model_path)