Exemple #1
0
def main():
    story_limit = 150
    epoch_batches_count = 64
    epochs_count = 1024
    lr = 1e-11
    optim = 1
    starting_epoch = -1
    bs = 32
    pgd = PreGenData(bs)

    task_dir = os.path.dirname(abspath(__file__))
    processed_data_dir = join(task_dir, 'data', "processed")
    lexicon_dictionary = pickle.load(
        open(join(processed_data_dir, 'lexicon-dict.pkl'), 'rb'))
    x = len(lexicon_dictionary)

    computer = DNC(x=x, v_t=x, bs=bs, W=64, L=64, R=32, h=256)

    # if load model
    # computer, optim, starting_epoch = load_model(computer)

    computer = computer.cuda()
    if optim is None:
        optimizer = torch.optim.Adam(computer.parameters(), lr=lr)
    else:
        print('use Adadelta optimizer with learning rate ', lr)
        optimizer = torch.optim.Adadelta(computer.parameters(), lr=lr)

    # starting with the epoch after the loaded one
    train(computer, optimizer, story_limit, bs, pgd, x,
          int(starting_epoch) + 1, epochs_count, epoch_batches_count)
Exemple #2
0
def test_rnn_no_memory_pass():
    T.manual_seed(1111)

    input_size = 100
    hidden_size = 100
    rnn_type = 'gru'
    num_layers = 3
    num_hidden_layers = 5
    dropout = 0.2
    nr_cells = 12
    cell_size = 17
    read_heads = 3
    gpu_id = -1
    debug = True
    lr = 0.001
    sequence_max_length = 10
    batch_size = 10
    cuda = gpu_id
    clip = 20
    length = 13

    rnn = DNC(input_size=input_size,
              hidden_size=hidden_size,
              rnn_type=rnn_type,
              num_layers=num_layers,
              num_hidden_layers=num_hidden_layers,
              dropout=dropout,
              nr_cells=nr_cells,
              cell_size=cell_size,
              read_heads=read_heads,
              gpu_id=gpu_id,
              debug=debug)

    optimizer = optim.Adam(rnn.parameters(), lr=lr)
    optimizer.zero_grad()

    input_data, target_output = generate_data(batch_size, length, input_size,
                                              cuda)
    target_output = target_output.transpose(0, 1).contiguous()

    (chx, mhx, rv) = (None, None, None)
    outputs = []
    for x in range(6):
        output, (chx, mhx, rv), v = rnn(input_data, (chx, mhx, rv),
                                        pass_through_memory=False)
        output = output.transpose(0, 1)
        outputs.append(output)

    output = functools.reduce(lambda x, y: x + y, outputs)
    loss = criterion((output), target_output)
    loss.backward()

    T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
    optimizer.step()

    assert target_output.size() == T.Size([27, 10, 100])
    assert chx[0].size() == T.Size([num_hidden_layers, 10, 100])
    assert mhx['memory'].size() == T.Size([10, 12, 17])
    assert rv == None
Exemple #3
0
def test_rnn_n():
  T.manual_seed(1111)

  input_size = 100
  hidden_size = 100
  rnn_type = 'rnn'
  num_layers = 3
  num_hidden_layers = 5
  dropout = 0.2
  nr_cells = 12
  cell_size = 17
  read_heads = 3
  gpu_id = -1
  debug = True
  lr = 0.001
  sequence_max_length = 10
  batch_size = 10
  cuda = gpu_id
  clip = 20
  length = 13

  rnn = DNC(
      input_size=input_size,
      hidden_size=hidden_size,
      rnn_type=rnn_type,
      num_layers=num_layers,
      num_hidden_layers=num_hidden_layers,
      dropout=dropout,
      nr_cells=nr_cells,
      cell_size=cell_size,
      read_heads=read_heads,
      gpu_id=gpu_id,
      debug=debug
  )

  optimizer = optim.Adam(rnn.parameters(), lr=lr)
  optimizer.zero_grad()

  input_data, target_output = generate_data(batch_size, length, input_size, cuda)
  target_output = target_output.transpose(0, 1).contiguous()

  output, (chx, mhx, rv), v = rnn(input_data, None)
  output = output.transpose(0, 1)

  loss = criterion((output), target_output)
  loss.backward()

  T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
  optimizer.step()

  assert target_output.size() == T.Size([27, 10, 100])
  assert chx[1].size() == T.Size([num_hidden_layers,10,100])
  assert mhx['memory'].size() == T.Size([10,12,17])
  assert rv.size() == T.Size([10, 51])
Exemple #4
0
    def _build_encoder_cell(self,
                            hparams,
                            num_layers,
                            num_residual_layers,
                            base_gpu=0):
        """Build a multi-layer RNN cell that can be used by encoder."""

        if hparams.model == 'model3':
            if hparams.mann == 'ntm':
                return NTMCell(hparams.num_layers,
                               hparams.num_units,
                               use_att_memory=False,
                               att_memory=False,
                               att_memory_size=None,
                               att_memory_vector_dim=None,
                               use_ext_memory=True,
                               ext_memory_size=hparams.num_memory_locations,
                               ext_memory_vector_dim=hparams.memory_unit_size,
                               ext_read_head_num=hparams.read_heads,
                               ext_write_head_num=hparams.write_heads,
                               dropout=hparams.dropout,
                               batch_size=hparams.batch_size,
                               mode=self.mode,
                               shift_range=1,
                               output_dim=hparams.num_units,
                               reuse=False,
                               record_w_history=hparams.record_w_history)
            elif hparams.mann == 'dnc':
                access_config = {
                    'memory_size': hparams.num_memory_locations,
                    'word_size': hparams.memory_unit_size,
                    'num_reads': hparams.read_heads,
                    'num_writes': hparams.write_heads
                }
                controller_config = {
                    'num_units': hparams.num_units,
                    'num_layers': hparams.num_layers
                }

                return DNC(access_config, controller_config, hparams.num_units,
                           20, hparams.dropout, self.mode, hparams.batch_size)
        else:
            return model_helper.create_rnn_cell(
                unit_type=hparams.unit_type,
                num_units=hparams.num_units,
                num_layers=num_layers,
                num_residual_layers=num_residual_layers,
                forget_bias=hparams.forget_bias,
                dropout=hparams.dropout,
                num_gpus=hparams.num_gpus,
                mode=self.mode,
                base_gpu=base_gpu,
                single_cell_fn=self.single_cell_fn,
                num_proj=None)
Exemple #5
0
def main():
	# Set random seed if given
	torch.manual_seed(RANDOM_SEED or torch.initial_seed())

	# Choose dataset and initialize size of data's input and output
	dataset = RepeatCopy()  # default parameters

	# Initialize DNC
	dnc = DNC(dataset.input_size, dataset.output_size,
		controller_config, memory_config)

	train(dnc, dataset)
Exemple #6
0
    def __init__(self, idim, cdim, num_heads, N, M, gpu):
        super(EncoderDNC, self).__init__()

        self.idim = idim
        self.hdim = idim
        self.rnn = DNC(input_size=idim,
                       hidden_size=cdim,
                       nr_cells=N,
                       cell_size=M,
                       read_heads=num_heads,
                       batch_first=False,
                       gpu_id=gpu)
Exemple #7
0
	def __init__(self, gpu_id=0, 
				 input_size=1, 
				 output_size=1, 
				 num_layers=4, 
				 hidden_size=128, 
				 rnn_type='gru', 
				 rnn_num_layers=2, 
				 rnn_hidden_size=128):
		super(ConflictMonitoringNet, self).__init__()
		self.gpu_id = gpu_id
		self.input_size = input_size
		self.output_size = output_size
		self.num_layers = num_layers
		self.hidden_size = hidden_size
		self.rnn_type = rnn_type
		self.rnn_num_layers = rnn_num_layers
		self.rnn_hidden_size = rnn_hidden_size
		self.cmd_regulariser = 0
		self.layers = nn.Sequential(
			CMDrop(nn.Conv1d(  input_size, hidden_size, 1), nn.ReLU()),
			CMDrop(nn.Conv1d( hidden_size, hidden_size, 1), nn.ReLU()),
			CMDrop(nn.Conv1d( hidden_size, hidden_size, 1), nn.ReLU()),
			CMDrop(nn.Conv1d( hidden_size, output_size, 1))
			)

		if self.rnn_type =='gru':
			self.rnn = nn.GRU(
				input_size=output_size,
				hidden_size=rnn_hidden_size,
				num_layers=rnn_num_layers,
				batch_first=True)
			self.rnn_hidden = None
		elif self.rnn_type == 'dnc':
			self.rnn = DNC(
				input_size=output_size,
				hidden_size=rnn_hidden_size,
				rnn_type='gru',
				num_layers=rnn_num_layers,
				nr_cells=10,
				cell_size=64,
				read_heads=4,
				batch_first=True,
				gpu_id=self.gpu_id)
			self.rnn_hidden = (controller_hidden, memory, read_vectors) = (None, None, None)

		self.rnn_output_layer = nn.Conv1d(rnn_hidden_size, num_layers, 1)
Exemple #8
0
    def __init__(self, vocab_size, emb_dim=64, device=torch.device('cpu:0')):
        super(DMNC, self).__init__()
        K = len(vocab_size)
        self.K = K
        self.vocab_size = vocab_size
        self.device = device

        self.token_start = vocab_size[2]
        self.token_end = vocab_size[2] + 1

        self.embeddings = nn.ModuleList([
            nn.Embedding(vocab_size[i] if i != 2 else vocab_size[2] + 2,
                         emb_dim) for i in range(K)
        ])
        self.dropout = nn.Dropout(p=0.5)

        self.encoders = nn.ModuleList([
            DNC(input_size=emb_dim,
                hidden_size=emb_dim,
                rnn_type='gru',
                num_layers=1,
                num_hidden_layers=1,
                nr_cells=16,
                cell_size=emb_dim,
                read_heads=1,
                batch_first=True,
                gpu_id=0,
                independent_linears=False) for _ in range(K - 1)
        ])

        self.decoder = nn.GRU(
            emb_dim + emb_dim * 2, emb_dim * 2,
            batch_first=True)  # input: (y, r1, r2,) hidden: (hidden1, hidden2)
        self.interface_weighting = nn.Linear(
            emb_dim * 2, 2 * (emb_dim + 1 + 3))  # 2 read head (key, str, mode)
        self.decoder_r2o = nn.Linear(2 * emb_dim, emb_dim * 2)

        self.output = nn.Linear(emb_dim * 2, vocab_size[2] + 2)
Exemple #9
0
    def __init__(self, batch_size, input_seq_length, output_seq_length):

        self._encoder_inputs = [
            tf.placeholder(tf.float32,
                           shape=[batch_size, data_point_dim],
                           name='inputs_{}'.format(i))
            for i in xrange(input_seq_length)
        ]
        self._labels = [
            tf.placeholder(tf.float32,
                           shape=[batch_size, data_point_dim],
                           name='labels_{}'.format(i))
            for i in xrange(output_seq_length)
        ]
        self._decoder_inputs = [
            tf.zeros_like(self._encoder_inputs[0], dtype=tf.float32, name='GO')
        ] + self._labels[:-1]
        rnn_cell = DNC(access_config, controller_config, data_point_dim,
                       clip_value)

        model_outputs, states = legacy_seq2seq.tied_rnn_seq2seq(
            self._encoder_inputs,
            self._decoder_inputs,
            rnn_cell,
            loop_function=lambda prev, _: prev)
        self._batch_size = batch_size
        self._input_seq_length = input_seq_length
        self._output_seq_length = output_seq_length
        self._squashed_output = tf.nn.softmax(model_outputs)
        self._cost = loss_function(tf.reshape(self._squashed_output, [-1]),
                                   tf.reshape(self._labels, [-1]))
        self._step = tf.train.AdamOptimizer(learning_rate=0.001).minimize(
            self._cost)
        self._session = tf.Session()

        init = tf.global_variables_initializer()
        self._session.run(init)
Exemple #10
0
from tasks import CopyTask, RepeatCopyTask, AndTask, XorTask, MergeTask
from utils import *

INPUT_SIZE = 8
BATCH_SIZE = 32

memory = Memory(25, 6)
memory.add_head(NTMReadHead, shifts=[-1, 0, 1])
memory.add_head(NTMReadHead, shifts=[-1, 0, 1])
memory.add_head(NTMWriteHead, shifts=[-1, 0, 1])

input = tf.placeholder(tf.float32, shape=(None, None, INPUT_SIZE+2))
#lstm  = tf.nn.rnn_cell.MultiRNNCell([LSTMCell(256) for i in range(3)])
lstm  = LSTMCell(100)

net = DNC(input, memory, INPUT_SIZE+2, controller = lstm, log_memory=True)
targets = tf.placeholder(dtype=tf.float32, shape=[None, None, INPUT_SIZE+2])
mask = tf.placeholder(dtype=tf.float32, shape=[None, None, INPUT_SIZE+2])
output  = net[0]
loss = tf.losses.sigmoid_cross_entropy(logits=output, weights=mask, multi_class_labels=targets)
cost = tf.reduce_sum( mask*((1 - targets * (1 - tf.exp(-output))) * tf.sigmoid(output)) ) / BATCH_SIZE

opt = tf.train.RMSPropOptimizer(1e-4, momentum=0.9)
train = minimize_and_clip(opt, loss)

img_summary = [tf.summary.image(key, concate_to_image(net[2][key]), max_outputs=1) for key in net[2]]
img_summary +=[tf.summary.image("IO/input", concate_to_image(input), max_outputs=1)]
img_summary +=[tf.summary.image("IO/targets", concate_to_image(targets), max_outputs=1)]
img_summary +=[tf.summary.image("IO/output", tf.sigmoid(concate_to_image(net[0])), max_outputs=1)]
img_summary +=[tf.summary.image("IO/output x mask", concate_to_image(tf.sigmoid(net[0])*mask), max_outputs=1)]
img_summary = tf.summary.merge(img_summary)
Exemple #11
0
    # Step 01: Load configuration
    try:
        with open(CONFIG_FILE, 'r') as fp:
            config = attrdict.AttrDict(yaml.load(fp))
    except IOError:
        log.error('Could not load configuration file: {}'.format(CONFIG_FILE))
        sys.exit(1)

    # Step 02: Load the training and testing data
    log.info('Loading data')
    try:
        data = BabiDatasetLoader.load(
            config.data.cache_dir,
            config.data.data_dir,
            config.dataset
        )
        if not data:
            log.error('Could not load or reprocess the data. Aborting')
            sys.exit(1)
    except IOError as exc:
        log.error('Failed to load the bAbI data set')
        print(exc)
        sys.exit(1)

    # Step 03: Train the model
    dnc = DNC(data, config.model)
    dnc.build(config.model)


Exemple #12
0
def sum2_task_single(args):
    dirname = os.path.dirname(
        os.path.abspath(__file__)) + '/data/save/sum2/{}'.format(args.name)
    if not os.path.isdir(dirname):
        os.mkdir(dirname)
    print(dirname)
    ckpts_dir = dirname
    batch_size = 50

    vocab_lower = 2
    vocab_upper = 150
    length_from = 1
    length_to = args.seq_len

    input_size = vocab_upper
    output_size = vocab_upper

    words_count = 32
    word_size = 64
    read_heads = 1

    iterations = args.num_iter
    start_step = 0

    sequence_max_length = 100
    if args.mode == 'train':
        ntest = 10
    else:
        ntest = 50

    graph = tf.Graph()
    with graph.as_default():
        with tf.Session(graph=graph) as session:

            llprint("Building Computational Graph ... ")

            ncomputer = DNC(StatelessRecurrentController,
                            input_size,
                            output_size,
                            sequence_max_length,
                            words_count,
                            word_size,
                            read_heads,
                            batch_size,
                            hidden_controller_dim=args.hidden_dim,
                            use_mem=args.use_mem,
                            dual_emb=True,
                            decoder_mode=True,
                            dual_controller=True,
                            write_protect=True)

            output, prob, loss, apply_gradients = ncomputer.build_loss_function_mask(
            )

            llprint("Done!\n")

            llprint("Initializing Variables ... ")
            session.run(tf.global_variables_initializer())
            if args.mode == 'test':
                ncomputer.restore(session, ckpts_dir, ncomputer.print_config())
                iterations = 1

            llprint("Done!\n")

            last_100_losses = []

            start = 0 if start_step == 0 else start_step + 1
            end = start_step + iterations + 1
            minloss = 1000
            start_time_100 = time.time()
            end_time_100 = None
            avg_100_time = 0.
            avg_counter = 0
            if args.mode == 'train':
                train_writer = tf.summary.FileWriter(
                    './data/summary/sum2/{}/'.format(ncomputer.print_config()),
                    session.graph)
            for i in range(start, end + 1):
                try:
                    llprint("\rIteration %d/%d" % (i, end))
                    if args.mode == 'train':
                        input_vec, output_vec, seq_len, decoder_point, masks, all_ose \
                            = sum2_sample_single_batch(vocab_lower, vocab_upper / 3, length_from, length_to,
                                                vocab_size=vocab_upper, bs=batch_size)

                        loss_value, _ = session.run(
                            [loss, apply_gradients],
                            feed_dict={
                                ncomputer.input_data: input_vec,
                                ncomputer.target_output: output_vec,
                                ncomputer.sequence_length: seq_len,
                                ncomputer.decoder_point: decoder_point,
                                ncomputer.mask: masks
                            })
                        last_100_losses.append(loss_value)

                    summerize = (i % 100 == 0)

                    if summerize:
                        llprint(
                            "\n\t episode %d -->Avg. Cross-Entropy: %.7f\n" %
                            (i, np.mean(last_100_losses)))
                        if args.mode == 'train':
                            summary = tf.Summary()
                            summary.value.add(
                                tag='batch_train_loss',
                                simple_value=np.mean(last_100_losses))
                        trscores = []
                        mloss = 1000
                        for ii in range(ntest):
                            input_vec, output_vec, seq_len, decoder_point, masks, all_ose \
                                = sum2_sample_single_batch(vocab_lower, vocab_upper / 3, length_from, length_to,
                                                           vocab_size=vocab_upper, bs=batch_size)
                            tloss, out = session.run(
                                [loss, prob],
                                feed_dict={
                                    ncomputer.input_data: input_vec,
                                    ncomputer.sequence_length: seq_len,
                                    ncomputer.decoder_point: decoder_point,
                                    ncomputer.target_output: output_vec,
                                    ncomputer.mask: masks
                                })
                            out = np.reshape(np.asarray(out),
                                             [-1, seq_len, vocab_upper])
                            out = np.argmax(out, axis=-1)
                            bout_list = []
                            # print('{} vs {}'.format(seq_len,out.shape[1]))

                            for b in range(out.shape[0]):
                                out_list = []
                                for io in range(decoder_point, out.shape[1]):
                                    if out[b][io] == 0:
                                        break
                                    out_list.append(out[b][io])
                                bout_list.append(out_list)

                            # for io in range(decoder_point, out.shape[1]):
                            #     out_list.append(out[0][io])
                            if tloss < mloss:
                                mloss = tloss
                            trscores.append(
                                exact_acc(np.asarray(all_ose),
                                          np.asarray(bout_list), 0.9))
                        if args.mode == 'train' and mloss < minloss:
                            minloss = mloss
                            print('save model')
                            ncomputer.save(session, ckpts_dir,
                                           ncomputer.print_config())
                        print('test bleu {}', format(np.mean(trscores)))
                        print('test bleu {}', format(np.mean(trscores)))
                        if args.mode == 'train':
                            summary.value.add(tag='train_bleu',
                                              simple_value=np.mean(trscores))
                            train_writer.add_summary(summary, i)
                            train_writer.flush()

                        end_time_100 = time.time()
                        elapsed_time = (end_time_100 - start_time_100) / 60
                        avg_counter += 1
                        avg_100_time += (1. / avg_counter) * (elapsed_time -
                                                              avg_100_time)
                        estimated_time = (avg_100_time *
                                          ((end - i) / 100.)) / 60.

                        print("\tAvg. 100 iterations time: %.2f minutes" %
                              (avg_100_time))
                        print("\tApprox. time to completion: %.2f hours" %
                              (estimated_time))

                        start_time_100 = time.time()
                        last_100_losses = []

                except KeyboardInterrupt:
                    sys.exit(0)
                    llprint("\nSaving Checkpoint ... "),
input = tf.placeholder(tf.float32, shape=(None, None, task.input_size))
#
if args.controller == 'lstm':
    controller = LSTMCell(args.controller_size)
elif args.controller == 'multilstm':
    controller = tf.nn.rnn_cell.MultiRNNCell(
        [LSTMCell(args.controller_size) for i in range(3)])
elif args.controller == 'ff':
    controller = dnc.ff.FFWrapper(
        dnc.ff.simple_feedforward(hidden=[args.controller_size] * 2))

if not args.no_dnc:
    net = DNC(input,
              memory,
              output_size=task.output_size,
              controller=controller,
              log_memory=True)
    output = net[0]
else:
    output, _ = tf.nn.dynamic_rnn(controller, input, dtype=tf.float32)
    output = tf.layers.dense(output, task.output_size, use_bias=False)

targets = tf.placeholder(dtype=tf.float32,
                         shape=[None, None, task.output_size])
mask = tf.placeholder(dtype=tf.float32, shape=[None, None, task.output_size])
if not args.no_mask:
    loss = tf.losses.sigmoid_cross_entropy(logits=output,
                                           weights=mask,
                                           multi_class_labels=targets)
    cost = tf.reduce_sum(mask * (1 - targets * (1 - tf.exp(-output))) *
Exemple #14
0
file = open('trainset', 'rb')
trainset = pickle.load(file)
file.close()

file = open('trainlabels', 'rb')
trainlables = pickle.load(file)
file.close()

train_data_loader = T.utils.data.DataLoader(dataset=trainset,
                                            batch_size=1,
                                            shuffle=False)
trainset = iter(train_data_loader)

print('Defining model...')
diffy = DNC(25, 128, num_layers=2, independent_linears=True)

loss_fn = T.nn.MSELoss()

optimizer = T.optim.Adam(diffy.parameters(), lr=0.0001, betas=[0.9, 0.98])

maxVal = 0

maxItem = []

print('Finding max...')
for item in trainset:
    if maxVal < len(item):
        maxVal = len(item)
        maxItem = item
    summarize_freq = args.summarize_freq
    check_freq = args.check_freq

    # input_size = output_size = args.input_size
    mem_slot = args.mem_slot
    mem_size = args.mem_size
    read_heads = args.read_heads

    if args.memory_type == 'dnc':
        rnn = DNC(input_size=args.input_size,
                  hidden_size=args.nhid,
                  rnn_type=args.rnn_type,
                  num_layers=args.nlayer,
                  num_hidden_layers=args.nhlayer,
                  dropout=args.dropout,
                  nr_cells=mem_slot,
                  cell_size=mem_size,
                  read_heads=read_heads,
                  gpu_id=args.cuda,
                  debug=args.visdom,
                  batch_first=True,
                  independent_linears=True)
    elif args.memory_type == 'sdnc':
        rnn = SDNC(input_size=args.input_size,
                   hidden_size=args.nhid,
                   rnn_type=args.rnn_type,
                   num_layers=args.nlayer,
                   num_hidden_layers=args.nhlayer,
                   dropout=args.dropout,
                   nr_cells=mem_slot,
                   cell_size=mem_size,
Exemple #16
0
np.random.seed(1)

g = tf.Graph()

with g.as_default():
    batch_size = 4
    output_size = 20
    input_size = 10
    memory_config = {'words_num': 256, 'word_size': 64, 'read_heads_num': 4}
    controller_config = {
        "hidden_size": 128,
    }

    dnc = DNC(
        controller_config,
        memory_config,
        output_size,
    )
    initial_state = dnc.initial_state(batch_size)
    example_input = np.random.uniform(0, 1, (batch_size, input_size)).astype(
        np.float32)
    output_op, _ = dnc(
        tf.convert_to_tensor(example_input),
        initial_state,
    )
    init = tf.global_variables_initializer()
    with tf.Session(graph=g) as sess:
        init.run()
        example_output = sess.run(output_op)

    tf.summary.FileWriter("graphs", g).close()
Exemple #17
0
  mem_slot = args.mem_slot
  mem_size = args.mem_size
  read_heads = args.read_heads


  # options, _ = getopt.getopt(sys.argv[1:], '', ['iterations='])

  # for opt in options:
  #   if opt[0] == '-iterations':
  #     iterations = int(opt[1])

  rnn = DNC(
    input_size=args.input_size,
    hidden_size=args.nhid,
    rnn_type='lstm',
    num_layers=args.nlayer,
    nr_cells=mem_slot,
    cell_size=mem_size,
    read_heads=read_heads,
    gpu_id=args.cuda
  )

  if args.cuda != -1:
    rnn = rnn.cuda(args.cuda)

  last_save_losses = []

  optimizer = optim.Adam(rnn.parameters(), lr=args.lr)

  for epoch in range(iterations + 1):
    llprint("\rIteration {ep}/{tot}".format(ep=epoch, tot=iterations))
    optimizer.zero_grad()
Exemple #18
0
        parser = argparse.ArgumentParser()
        parser.add_argument("task", type=str)
        parser.add_argument("datadir", type=str, help="Where do you want your datasets to be?")
        parser.add_argument("--summary-dir", type=str, help="Summary directory for tensorboard", default=None)
        args = parser.parse_args()

        args.task = args.task.lower()
        if not args.task in SUPPORTED_TASKS:
            print("Unsupported task: {}".format(args.task))
            sys.exit(1)

        if args.task == 'babi':
            print("== BABI ==")
            download_babi(args.datadir)
            X_train, X_test, y_train, y_test = load_babi(args.datadir, lesson=1)
        elif args.task == "copy":
            print("== COPY ==")
            X_train, X_test, y_train, y_test = make_copy_dataset(args.datadir)

        #X_train = X_train[:, -2:, :] # for debugging
        #X_test = X_test[:, -2:, :] # for debugging
        print("== DNC ==")
        print()
        machine = DNC(X_train, y_train, X_test, y_test,
                        summary_dir=args.summary_dir, N=X_train.shape[1], W=10, R=3,
                        #checkpoint_file="{}.ckpt".format(args.task),
                        optimizer="RMSProp")
        print("== Training ==")
        print()
        machine.train()
Exemple #19
0
import torch as T
from dnc import DNC
import torchvision as tv

train = tv.datasets.MNIST('.', train=True, transform=tv.transforms.ToTensor())
test = tv.datasets.MNIST('.', transform=tv.transforms.ToTensor())

batch_size = 1

train_data_loader = T.utils.data.DataLoader(dataset=train,
                                            batch_size=batch_size,
                                            shuffle=True)
trainset = iter(train_data_loader)
trainsize = len(train_data_loader)

diffy = DNC(28, 128, num_layers=1, independent_linears=True)

loss_fn = T.nn.MSELoss()

optimizer = T.optim.Adam(diffy.parameters(),
                         lr=0.0001,
                         eps=1e-9,
                         betas=[0.9, 0.98])

(controller_hidden, memory, read_vectors) = (None, None, None)

ranges = 2 * trainsize

for it in range(ranges):
    optimizer.zero_grad()
    img, true_out = next(trainset)
Exemple #20
0
# helper funcs
def binary_cross_entropy(y_hat, y):
    return tf.reduce_mean(-y * tf.log(y_hat) - (1 - y) * tf.log(1 - y_hat))


def llprint(message):
    sys.stdout.write(message)
    sys.stdout.flush()


# build graph
sess = tf.InteractiveSession()

llprint("building graph...\n")
optimizer = tf.train.RMSPropOptimizer(FLAGS.lr, momentum=FLAGS.momentum)
dnc = DNC(RNNController, FLAGS, input_steps=FLAGS.ascii_steps)

llprint("defining loss...\n")
y_hat, outputs = dnc.get_outputs()
# TODO: fix this loss: l2 on [:,:,:2] and then do binary cross entropy on <EOS> tags
loss = tf.nn.l2_loss(dnc.y -
                     y_hat) * 100. / (FLAGS.batch_size *
                                      (FLAGS.ascii_steps + FLAGS.stroke_steps))

llprint("computing gradients...\n")
gradients = optimizer.compute_gradients(loss)
for i, (grad, var) in enumerate(gradients):
    if grad is not None:
        gradients[i] = (tf.clip_by_value(grad, -10, 10), var)

grad_op = optimizer.apply_gradients(gradients)
Exemple #21
0
    summarize_freq = args.summarize_freq
    check_freq = args.check_freq

    # input_size = output_size = args.input_size
    mem_slot = args.mem_slot
    mem_size = args.mem_size
    read_heads = args.read_heads

    if args.memory_type == 'dnc':
        rnn = DNC(input_size=args.input_size,
                  hidden_size=args.nhid,
                  rnn_type=args.rnn_type,
                  num_layers=args.nlayer,
                  num_hidden_layers=args.nhlayer,
                  dropout=args.dropout,
                  nr_cells=mem_slot,
                  cell_size=mem_size,
                  read_heads=read_heads,
                  gpu_id=args.cuda,
                  debug=args.visdom,
                  batch_first=True,
                  independent_linears=True)
    elif args.memory_type == 'sdnc':
        rnn = SDNC(input_size=args.input_size,
                   hidden_size=args.nhid,
                   rnn_type=args.rnn_type,
                   num_layers=args.nlayer,
                   num_hidden_layers=args.nhlayer,
                   dropout=args.dropout,
                   nr_cells=mem_slot,
                   cell_size=mem_size,
Exemple #22
0
For the Associative task, input_size: seq_width + 2, output_size: seq_width
For the NGram task, input_size: 1, output_size: 1
For the Priority Sort task, input_size: seq_width + 1, output_size: seq_width
"""
has_tau=0
if args.model=='ntm':
    model = NTM(input_size= input_size,
          output_size=output_size,
          controller_size=args.lstm_size,
          memory_units=128,
          memory_unit_size=20,
          num_heads=1)#task_params['num_heads'])
elif args.model=='dnc':
    model = DNC(input_size= input_size,
          output_size=output_size,
          hidden_size=args.lstm_size,
          nr_cells=128,
          cell_size=20,
          read_heads=1)#task_params['num_heads'])
    model.init_param()
elif args.model=='sam':
    model = SAM(input_size= input_size,
          output_size=output_size,
          hidden_size=args.lstm_size,
          nr_cells=128,
          cell_size=20,
          read_heads=1)#read_heads=4???#task_params['num_heads'])
    model.init_param()
elif args.model=='lstm':
    marnn_config=args
    print('marnn_config:\n',marnn_config)
    model = MARNN(marnn_config,input_size=input_size,
Exemple #23
0
    def _build_model(self):
        if args.mann == 'none':

            def single_cell(num_units):
                return tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=1.0)

            cell = tf.contrib.rnn.OutputProjectionWrapper(
                tf.contrib.rnn.MultiRNNCell([
                    single_cell(args.num_units) for _ in range(args.num_layers)
                ]),
                args.num_bits_per_vector,
                activation=None)

            initial_state = tuple(
                tf.contrib.rnn.LSTMStateTuple(
                    c=expand(tf.tanh(learned_init(args.num_units)),
                             dim=0,
                             N=args.batch_size),
                    h=expand(tf.tanh(learned_init(args.num_units)),
                             dim=0,
                             N=args.batch_size))
                for _ in range(args.num_layers))

        elif args.mann == 'ntm':
            cell = NTMCell(args.num_layers,
                           args.num_units,
                           args.num_memory_locations,
                           args.memory_size,
                           args.num_read_heads,
                           args.num_write_heads,
                           addressing_mode='content_and_location',
                           shift_range=args.conv_shift_range,
                           reuse=False,
                           output_dim=args.num_bits_per_vector,
                           clip_value=args.clip_value,
                           init_mode=args.init_mode)

            initial_state = cell.zero_state(args.batch_size, tf.float32)
        elif args.mann == 'dnc':
            access_config = {
                'memory_size': args.num_memory_locations,
                'word_size': args.memory_size,
                'num_reads': args.num_read_heads,
                'num_writes': args.num_write_heads,
            }
            controller_config = {
                'hidden_size': args.num_units,
            }

            cell = DNC(access_config, controller_config,
                       args.num_bits_per_vector, args.clip_value)
            initial_state = cell.initial_state(args.batch_size)

        output_sequence, _ = tf.nn.dynamic_rnn(cell=cell,
                                               inputs=self.inputs,
                                               time_major=False,
                                               initial_state=initial_state)

        if args.task == 'copy' or args.task == 'repeat_copy':
            self.output_logits = output_sequence[:, self.max_seq_len + 1:, :]
        elif args.task == 'associative_recall':
            self.output_logits = output_sequence[:,
                                                 3 * (self.max_seq_len + 1) +
                                                 2:, :]
        elif args.task in ('traversal', 'shortest_path'):
            self.output_logits = output_sequence[:, -self.max_seq_len:, :]

        if args.task in ('copy', 'repeat_copy', 'associative_recall'):
            self.outputs = tf.sigmoid(self.output_logits)

        if args.task in ('traversal', 'shortest_path'):
            output_logits_split = tf.split(self.output_logits, 9, axis=2)
            self.outputs = tf.concat(
                [tf.nn.softmax(logits) for logits in output_logits_split],
                axis=2)
Exemple #24
0
def model_fn(features, labels, mode, params):

    # --------------------------------------------------------------------------
    # Model
    # --------------------------------------------------------------------------

    access_config = {
        "memory_size": 16,
        "word_size": 16,
        "num_reads": 4,
        "num_writes": 1,
    }

    controller_config = {
        "hidden_size": 64,
    }

    clip_value = 20

    dnc_core = DNC(access_config, controller_config, 5, clip_value)
    initial_state = dnc_core.initial_state(params["batch_size"])
    output_logits, _ = tf.nn.dynamic_rnn(cell=dnc_core,
                                         inputs=features,
                                         time_major=True,
                                         initial_state=initial_state)

    # --------------------------------------------------------------------------
    # Build EstimatorSpec
    # --------------------------------------------------------------------------

    train_loss = params["dataset_" + mode].cost(output_logits,
                                                labels["target"],
                                                labels["mask"])

    # Set up optimizer with global norm clipping.
    trainable_variables = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(
        tf.gradients(train_loss, trainable_variables), params["max_grad_norm"])

    global_step = tf.get_variable(
        name="global_step",
        shape=[],
        dtype=tf.int64,
        initializer=tf.zeros_initializer(),
        trainable=False,
        collections=[tf.GraphKeys.GLOBAL_VARIABLES, tf.GraphKeys.GLOBAL_STEP])

    optimizer = tf.train.RMSPropOptimizer(params["lr"],
                                          epsilon=params["optimizer_epsilon"])

    train_step = optimizer.apply_gradients(zip(grads, trainable_variables),
                                           global_step=global_step)

    # dataset_tensors_np, output_np = sess.run([dataset_tensors, output])
    # dataset_string = dataset.to_human_readable(dataset_tensors_np, output_np)

    output_sigmoid = tf.nn.sigmoid(output_logits)
    delta = tf.abs(output_sigmoid - labels["target"])
    tf.summary.histogram("delta", delta)
    equality = tf.cast(delta < 0.1, tf.float32) * tf.expand_dims(
        labels["mask"], -1)

    correct_elements = tf.reduce_mean(tf.reduce_sum(equality, [0, 2]))
    pct_correct = tf.reduce_mean(
        tf.reduce_sum(equality, [0, 2]) /
        tf.cast(labels["total_targ_batch"], tf.float32))

    eval_metric_ops = {
        "accuracy":
        tf.metrics.mean(pct_correct),
        "loss":
        tf.metrics.mean(train_loss),
        "correct_elements":
        tf.metrics.mean(correct_elements),
        "total_elements":
        tf.metrics.mean(tf.cast(labels["total_targ_batch"], tf.float32))
    }

    image_mask = tf.expand_dims(tf.expand_dims(labels["mask"], -1), -1)

    xent = tf.expand_dims(
        tf.nn.sigmoid_cross_entropy_with_logits(
            labels=labels["target"],
            logits=output_logits * tf.expand_dims(labels["mask"], -1)), -1)

    image = tf.concat(
        [
            # tf.expand_dims(output_logits, -1),
            output_sigmoid,
            labels["target"],
            # tf.expand_dims(equality, -1),
            # xent / tf.reduce_max(xent)
        ],
        -1)
    # tf summary image expects shape [batch_size, height, width, channels]
    image = tf.transpose(image, perm=[1, 0, 2])
    tf.summary.image("output_compare", tf.expand_dims(image, -1), 4)

    tf.summary.scalar("train_loss", tf.reduce_mean(train_loss))
    tf.summary.scalar("train_accuracy", pct_correct)
    tf.summary.scalar("correct_elements", correct_elements)
    tf.summary.scalar("total_elements",
                      tf.reduce_mean(labels["total_targ_batch"], axis=-1))

    tf.summary.scalar(
        "max_length",
        tf.convert_to_tensor(params["dataset_" + mode]._max_length))
    tf.summary.scalar(
        "max_repeats",
        tf.convert_to_tensor(params["dataset_" + mode]._max_repeats))

    return tf.estimator.EstimatorSpec(mode,
                                      loss=train_loss,
                                      train_op=train_step,
                                      eval_metric_ops=eval_metric_ops,
                                      scaffold=gen_scaffold(params))