Esempio n. 1
0
def main():
    story_limit = 150
    epoch_batches_count = 64
    epochs_count = 1024
    lr = 1e-11
    optim = 1
    starting_epoch = -1
    bs = 32
    pgd = PreGenData(bs)

    task_dir = os.path.dirname(abspath(__file__))
    processed_data_dir = join(task_dir, 'data', "processed")
    lexicon_dictionary = pickle.load(
        open(join(processed_data_dir, 'lexicon-dict.pkl'), 'rb'))
    x = len(lexicon_dictionary)

    computer = DNC(x=x, v_t=x, bs=bs, W=64, L=64, R=32, h=256)

    # if load model
    # computer, optim, starting_epoch = load_model(computer)

    computer = computer.cuda()
    if optim is None:
        optimizer = torch.optim.Adam(computer.parameters(), lr=lr)
    else:
        print('use Adadelta optimizer with learning rate ', lr)
        optimizer = torch.optim.Adadelta(computer.parameters(), lr=lr)

    # starting with the epoch after the loaded one
    train(computer, optimizer, story_limit, bs, pgd, x,
          int(starting_epoch) + 1, epochs_count, epoch_batches_count)
Esempio n. 2
0
def test_rnn_no_memory_pass():
    T.manual_seed(1111)

    input_size = 100
    hidden_size = 100
    rnn_type = 'gru'
    num_layers = 3
    num_hidden_layers = 5
    dropout = 0.2
    nr_cells = 12
    cell_size = 17
    read_heads = 3
    gpu_id = -1
    debug = True
    lr = 0.001
    sequence_max_length = 10
    batch_size = 10
    cuda = gpu_id
    clip = 20
    length = 13

    rnn = DNC(input_size=input_size,
              hidden_size=hidden_size,
              rnn_type=rnn_type,
              num_layers=num_layers,
              num_hidden_layers=num_hidden_layers,
              dropout=dropout,
              nr_cells=nr_cells,
              cell_size=cell_size,
              read_heads=read_heads,
              gpu_id=gpu_id,
              debug=debug)

    optimizer = optim.Adam(rnn.parameters(), lr=lr)
    optimizer.zero_grad()

    input_data, target_output = generate_data(batch_size, length, input_size,
                                              cuda)
    target_output = target_output.transpose(0, 1).contiguous()

    (chx, mhx, rv) = (None, None, None)
    outputs = []
    for x in range(6):
        output, (chx, mhx, rv), v = rnn(input_data, (chx, mhx, rv),
                                        pass_through_memory=False)
        output = output.transpose(0, 1)
        outputs.append(output)

    output = functools.reduce(lambda x, y: x + y, outputs)
    loss = criterion((output), target_output)
    loss.backward()

    T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
    optimizer.step()

    assert target_output.size() == T.Size([27, 10, 100])
    assert chx[0].size() == T.Size([num_hidden_layers, 10, 100])
    assert mhx['memory'].size() == T.Size([10, 12, 17])
    assert rv == None
Esempio n. 3
0
def test_rnn_n():
  T.manual_seed(1111)

  input_size = 100
  hidden_size = 100
  rnn_type = 'rnn'
  num_layers = 3
  num_hidden_layers = 5
  dropout = 0.2
  nr_cells = 12
  cell_size = 17
  read_heads = 3
  gpu_id = -1
  debug = True
  lr = 0.001
  sequence_max_length = 10
  batch_size = 10
  cuda = gpu_id
  clip = 20
  length = 13

  rnn = DNC(
      input_size=input_size,
      hidden_size=hidden_size,
      rnn_type=rnn_type,
      num_layers=num_layers,
      num_hidden_layers=num_hidden_layers,
      dropout=dropout,
      nr_cells=nr_cells,
      cell_size=cell_size,
      read_heads=read_heads,
      gpu_id=gpu_id,
      debug=debug
  )

  optimizer = optim.Adam(rnn.parameters(), lr=lr)
  optimizer.zero_grad()

  input_data, target_output = generate_data(batch_size, length, input_size, cuda)
  target_output = target_output.transpose(0, 1).contiguous()

  output, (chx, mhx, rv), v = rnn(input_data, None)
  output = output.transpose(0, 1)

  loss = criterion((output), target_output)
  loss.backward()

  T.nn.utils.clip_grad_norm(rnn.parameters(), clip)
  optimizer.step()

  assert target_output.size() == T.Size([27, 10, 100])
  assert chx[1].size() == T.Size([num_hidden_layers,10,100])
  assert mhx['memory'].size() == T.Size([10,12,17])
  assert rv.size() == T.Size([10, 51])
Esempio n. 4
0
            output, (chx, mhx, rv), v = rnn(input_data, (None, mhx, None),
                                            reset_experience=True,
                                            pass_through_memory=True)
        else:
            output, (chx, mhx, rv) = rnn(input_data, (None, mhx, None),
                                         reset_experience=True,
                                         pass_through_memory=True)

        loss = criterion((output), target_output)

        if args.optim_type == 'ldni':
            rnn.register_loss(loss)

        loss.backward()

        T.nn.utils.clip_grad_norm(rnn.parameters(), args.clip)
        if args.optim_type != 'dni':
            optimizer.step()
        loss_value = loss.data[0]

        summarize = (epoch % summarize_freq == 0)
        take_checkpoint = (epoch != 0) and (epoch % check_freq == 0)
        increment_curriculum = (epoch != 0) and (epoch % args.curriculum_freq
                                                 == 0)

        # detach memory from graph
        if mhx is not None:
            mhx = {
                k: (v.detach() if isinstance(v, var) else v)
                for k, v in mhx.items()
            }
Esempio n. 5
0
                  independent_linears=False)
    elif args.memory_type == 'lstm':
        rnn = LSTMModel(args.input_size,
                        args.nhid,
                        num_layers=args.nhlayer,
                        dropout=args.dropout,
                        batch_first=True)
    else:
        raise Exception('Not recognized type of memory')

    # register_nan_checks(rnn)

    last_save_losses = []

    if args.optim == 'adam':
        optimizer = optim.Adam(rnn.parameters(),
                               lr=args.lr,
                               eps=1e-9,
                               betas=[0.9, 0.98])  # 0.0001
    elif args.optim == 'adamax':
        optimizer = optim.Adamax(rnn.parameters(),
                                 lr=args.lr,
                                 eps=1e-9,
                                 betas=[0.9, 0.98])  # 0.0001
    elif args.optim == 'rmsprop':
        optimizer = optim.RMSprop(rnn.parameters(),
                                  lr=args.lr,
                                  momentum=0.9,
                                  eps=1e-10)  # 0.0001
    elif args.optim == 'sgd':
        optimizer = optim.SGD(rnn.parameters(), lr=args.lr)  # 0.01
Esempio n. 6
0
train = tv.datasets.MNIST('.', train=True, transform=tv.transforms.ToTensor())
test = tv.datasets.MNIST('.', transform=tv.transforms.ToTensor())

batch_size = 1

train_data_loader = T.utils.data.DataLoader(dataset=train,
                                            batch_size=batch_size,
                                            shuffle=True)
trainset = iter(train_data_loader)
trainsize = len(train_data_loader)

diffy = DNC(28, 128, num_layers=1, independent_linears=True)

loss_fn = T.nn.MSELoss()

optimizer = T.optim.Adam(diffy.parameters(),
                         lr=0.0001,
                         eps=1e-9,
                         betas=[0.9, 0.98])

(controller_hidden, memory, read_vectors) = (None, None, None)

ranges = 2 * trainsize

for it in range(ranges):
    optimizer.zero_grad()
    img, true_out = next(trainset)
    img = T.squeeze(img, 1)

    output, (controller_hidden, memory,
             read_vectors) = diffy(img, (None, memory, None),
Esempio n. 7
0
        sparse_reads=args.sparse_reads,
        read_heads=args.read_heads,
        gpu_id=args.cuda,
        debug=args.visdom,
        batch_first=True,
        independent_linears=False
    )
  else:
    raise Exception('Not recognized type of memory')

  print(rnn)

  last_save_losses = []

  if args.optim == 'adam':
    optimizer = optim.Adam(rnn.parameters(), lr=args.lr, eps=1e-9, betas=[0.9, 0.98])  # 0.0001
  elif args.optim == 'adamax':
    optimizer = optim.Adamax(rnn.parameters(), lr=args.lr, eps=1e-9, betas=[0.9, 0.98])  # 0.0001
  elif args.optim == 'rmsprop':
    optimizer = optim.RMSprop(rnn.parameters(), lr=args.lr, momentum=0.9, eps=1e-10)  # 0.0001
  elif args.optim == 'sgd':
    optimizer = optim.SGD(rnn.parameters(), lr=args.lr)  # 0.01
  elif args.optim == 'adagrad':
    optimizer = optim.Adagrad(rnn.parameters(), lr=args.lr)
  elif args.optim == 'adadelta':
    optimizer = optim.Adadelta(rnn.parameters(), lr=args.lr)

  debug_enabled = rnn.debug
  rnn = DNI(rnn, hidden_size=args.nhid, optim=optimizer)

  if args.cuda != -1:
Esempio n. 8
0
    input_size=args.input_size,
    hidden_size=args.nhid,
    rnn_type='lstm',
    num_layers=args.nlayer,
    nr_cells=mem_slot,
    cell_size=mem_size,
    read_heads=read_heads,
    gpu_id=args.cuda
  )

  if args.cuda != -1:
    rnn = rnn.cuda(args.cuda)

  last_save_losses = []

  optimizer = optim.Adam(rnn.parameters(), lr=args.lr)

  for epoch in range(iterations + 1):
    llprint("\rIteration {ep}/{tot}".format(ep=epoch, tot=iterations))
    optimizer.zero_grad()

    random_length = np.random.randint(1, sequence_max_length + 1)

    input_data, target_output = generate_data(batch_size, random_length, args.input_size, args.cuda)
    # input_data = input_data.transpose(0, 1).contiguous()
    target_output = target_output.transpose(0, 1).contiguous()

    output, _ = rnn(input_data, None)
    output = output.transpose(0, 1)

    loss = criterion((output), target_output)
Esempio n. 9
0
file = open('trainlabels', 'rb')
trainlables = pickle.load(file)
file.close()

train_data_loader = T.utils.data.DataLoader(dataset=trainset,
                                            batch_size=1,
                                            shuffle=False)
trainset = iter(train_data_loader)

print('Defining model...')
diffy = DNC(25, 128, num_layers=2, independent_linears=True)

loss_fn = T.nn.MSELoss()

optimizer = T.optim.Adam(diffy.parameters(), lr=0.0001, betas=[0.9, 0.98])

maxVal = 0

maxItem = []

print('Finding max...')
for item in trainset:
    if maxVal < len(item):
        maxVal = len(item)
        maxItem = item

print('Padding values...')
for i in range(len(trainset)):
    if len(trainset[i]) < maxVal:
        while len(trainset[i]) < maxVal: