Example #1
0
def main():
    story_limit = 150
    epoch_batches_count = 64
    epochs_count = 1024
    lr = 1e-11
    optim = 1
    starting_epoch = -1
    bs = 32
    pgd = PreGenData(bs)

    task_dir = os.path.dirname(abspath(__file__))
    processed_data_dir = join(task_dir, 'data', "processed")
    lexicon_dictionary = pickle.load(
        open(join(processed_data_dir, 'lexicon-dict.pkl'), 'rb'))
    x = len(lexicon_dictionary)

    computer = DNC(x=x, v_t=x, bs=bs, W=64, L=64, R=32, h=256)

    # if load model
    # computer, optim, starting_epoch = load_model(computer)

    computer = computer.cuda()
    if optim is None:
        optimizer = torch.optim.Adam(computer.parameters(), lr=lr)
    else:
        print('use Adadelta optimizer with learning rate ', lr)
        optimizer = torch.optim.Adadelta(computer.parameters(), lr=lr)

    # starting with the epoch after the loaded one
    train(computer, optimizer, story_limit, bs, pgd, x,
          int(starting_epoch) + 1, epochs_count, epoch_batches_count)
Example #2
0
    elif args.optim == 'rmsprop':
        optimizer = optim.RMSprop(rnn.last_layer.parameters(),
                                  lr=args.lr,
                                  momentum=0.9,
                                  eps=1e-10)  # 0.0001
    elif args.optim == 'sgd':
        optimizer = optim.SGD(rnn.last_layer.parameters(), lr=args.lr)  # 0.01
    elif args.optim == 'adagrad':
        optimizer = optim.Adagrad(rnn.last_layer.parameters(), lr=args.lr)
    elif args.optim == 'adadelta':
        optimizer = optim.Adadelta(rnn.last_layer.parameters(), lr=args.lr)

    debug_enabled = hasattr(rnn, 'debug') and rnn.debug

    if args.cuda != -1:
        rnn = rnn.cuda(args.cuda)

    (chx, mhx, rv) = (None, None, None)
    for epoch in range(iterations + 1):
        llprint("\rIteration {ep}/{tot}".format(ep=epoch, tot=iterations))
        optimizer.zero_grad()

        random_length = np.random.randint(1, sequence_max_length + 1)

        input_data, target_output = generate_data(batch_size, random_length,
                                                  args.input_size, args.cuda)

        if debug_enabled:
            output, (chx, mhx, rv), v = rnn(input_data, (None, mhx, None),
                                            reset_experience=True,
                                            pass_through_memory=True)