Esempio n. 1
0
def main(args):

    # Read configs
    with open(args.cfg_path, "r") as fp:
        configs = json.load(fp)

    # Update the configs based on command line args
    arg_dict = vars(args)
    for key in arg_dict:
        if key in configs:
            if arg_dict[key] is not None:
                configs[key] = arg_dict[key]
    
    configs = utils.ConfigMapper(configs)

    configs.attack_eps = float(configs.attack_eps)
    configs.attack_lr = float(configs.attack_lr)

    print("configs mode: ", configs.mode)
    print("configs lr: ", configs.lr)
    print("configs size: ", configs.size)

    configs.save_path = os.path.join(configs.save_path, configs.mode)
    experiment_name = exp_name(configs)
    configs.save_path = os.path.join(configs.save_path, experiment_name)
    pathlib.Path(configs.save_path).mkdir(parents=True, exist_ok=True)

    trainer = Trainer(configs)
    trainer.train()

    print("training is over!!!")
Esempio n. 2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--learning-rate', '-lr', type=float, default=1e-3)
    parser.add_argument('--epochs', type=int, default=20)
    parser.add_argument('--no-cuda', action='store_true')
    parser.add_argument('--data-parallel', action='store_true')
    parser.add_argument('--num-d-iterations', type=int, default=1)
    args = parser.parse_args()
    args.cuda = torch.cuda.is_available() and not args.no_cuda
    print(args)

    device = torch.device('cuda' if args.cuda else 'cpu')

    net_g = Generator(ch=128).to(device)
    net_d = Discriminator(ch=128).to(device)

    optim_g = optim.Adam(
        net_g.parameters(), lr=args.learning_rate, betas=(0.5, 0.999))
    optim_d = optim.Adam(
        net_d.parameters(), lr=args.learning_rate, betas=(0.5, 0.999))

    dataloader = get_cat_dataloader()

    trainer = Trainer(net_g, net_d, optim_g, optim_d, dataloader, device,
                      args.num_d_iterations)

    os.makedirs('samples', exist_ok=True)

    trainer.train(args.epochs)
Esempio n. 3
0
def main():
    create_logging()
    sess = tf.Session()
    dl = IMDBDataLoader(sess)
    model = Model(dl)
    trainer = Trainer(sess, model, dl)
    trainer.train()
Esempio n. 4
0
def main(args):

    # Read configs
    with open(args.cfg_path, "r") as fp:
        configs = json.load(fp)

    # Update the configs based on command line args
    arg_dict = vars(args)
    for key in arg_dict:
        if key in configs:
            if arg_dict[key] is not None:
                configs[key] = arg_dict[key]
    configs = utils.ConfigMapper(configs)

    configs.attack_eps = float(configs.attack_eps) / 255
    configs.attack_lr = float(configs.attack_lr) / 255

    configs.save_path = os.path.join(configs.save_path, configs.mode,
                                     configs.alg)
    pathlib.Path(configs.save_path).mkdir(parents=True, exist_ok=True)

    if configs.mode == 'train':
        trainer = Trainer(configs)
        trainer.train()
    elif configs.mode == 'eval':
        evaluator = Evaluator(configs)
        evaluator.eval()
    elif configs.mode == 'vis':
        visualizer = Visualizer(configs)
        visualizer.visualize()
    else:
        raise ValueError('mode should be train, eval or vis')
Esempio n. 5
0
def main(args):

    # Read configs
    with open(args.cfg_path, "r") as fp:
        configs = json.load(fp)

    # Update the configs based on command line args
    arg_dict = vars(args)
    for key in arg_dict:
        if key in configs:
            if arg_dict[key] is not None:
                configs[key] = arg_dict[key]

    configs = utils.ConfigMapper(configs)

    configs.attack_eps = float(configs.attack_eps) / 255
    configs.attack_lr = float(configs.attack_lr) / 255

    print("configs mode: ", configs.mode)
    print("configs lr: ", configs.lr)

    configs.save_path = os.path.join(configs.save_path, configs.mode)
    experiment_name = exp_name(configs)
    configs.save_path = os.path.join(configs.save_path, experiment_name)
    pathlib.Path(configs.save_path).mkdir(parents=True, exist_ok=True)

    # settings
    experiment_name = "resnet18_Adversarial_Training_margin" + '_lr_' + str(
        configs.lr) + '_alpha_' + str(configs.alpha) + '_seed_' + str(
            configs.seed) + '_epsilon_' + str(configs.attack_eps)

    trainer = Trainer(configs)
    trainer.train()

    print("training is over!!!")
Esempio n. 6
0
def train2():
    global training_data2, n2, t2

    layers = []
    layers.append({
        'type': 'input',
        'out_sx': 28,
        'out_sy': 28,
        'out_depth': 1
    })
    layers.append({'type': 'fc', 'num_neurons': 100, 'activation': 'sigmoid'})
    layers.append({'type': 'softmax', 'num_classes': 10})
    print 'Layers made...'

    n2 = Net(layers)
    print 'Net made...'
    print n2

    t2 = Trainer(n2, {
        'method': 'adadelta',
        'batch_size': 20,
        'l2_decay': 0.001
    })
    print 'Trainer made...'

    print 'In training of smaller net...'
    print 'k', 'time\t\t  ', 'loss\t  ', 'training accuracy'
    print '----------------------------------------------------'
    try:
        for x, y in training_data2:
            stats = t2.train(x, y)
            print stats['k'], stats['time'], stats['loss'], stats['accuracy']
    except:  #hit control-c or other
        return
Esempio n. 7
0
def train2():
    global training_data2, n2, t2

    layers = []
    layers.append({'type': 'input', 'out_sx': 28, 'out_sy': 28, 'out_depth': 1})
    layers.append({'type': 'fc', 'num_neurons': 100, 'activation': 'sigmoid'})
    layers.append({'type': 'softmax', 'num_classes': 10})
    print 'Layers made...'

    n2 = Net(layers)
    print 'Net made...'
    print n2

    t2 = Trainer(n2, {'method': 'adadelta', 'batch_size': 20, 'l2_decay': 0.001});
    print 'Trainer made...' 

    print 'In training of smaller net...'
    print 'k', 'time\t\t  ', 'loss\t  ', 'training accuracy'
    print '----------------------------------------------------'
    try:
        for x, y in training_data2: 
            stats = t2.train(x, y)
            print stats['k'], stats['time'], stats['loss'], stats['accuracy']
    except: #hit control-c or other
        return
Esempio n. 8
0
def run_small_net():
    global training_data2, n2, t2, testing_data

    layers = []
    layers.append({'type': 'input', 'out_sx': 24, 'out_sy': 24, 'out_depth': 1})
    #layers.append({'type': 'fc', 'num_neurons': 50, 'activation': 'relu'})
    layers.append({'type': 'softmax', 'num_classes': 10})
    print 'Layers made...'

    n2 = Net(layers)
    print 'Smaller Net made...'
    print n2

    t2 = Trainer(n2, {'method': 'sgd', 'momentum': 0.0})
    print 'Trainer made for smaller net...'

    print 'In training of smaller net...'
    print 'k', 'time\t\t  ', 'loss\t  ', 'training accuracy'
    print '----------------------------------------------------'
    try:
        for x, y in training_data2: 
            stats = t2.train(x, y)
            print stats['k'], stats['time'], stats['loss'], stats['accuracy']
    except: #hit control-c or other
        pass

    print 'Testing smaller net: 5000 trials'
    right = 0
    count = 5000
    for x, y in sample(testing_data, count):
        n2.forward(x)
        right += n2.getPrediction() == y
    accuracy = float(right) / count * 100
    print accuracy
Esempio n. 9
0
def main():
    # capture the config path from the run arguments
    # then process the json configration file
    try:
        args = get_args()
        config = process_config(args.config)
    except:
        print("missing or invalid arguments")
        exit(0)

    # create tensorflow session
    tf.reset_default_graph()
    sess = tf.Session(config=tf_config)
    # create instance of the model you want
    model = DLPDE_Model(config)
    model.load(sess)
    # create your data generator
    train_data, test_data, input_train_extra = creat_dataset(config)
    # create tensorboard logger
    logger = Logger(sess, config)
    # create trainer and path all previous components to it
    trainer = Trainer(sess, model, train_data, test_data, config, logger)

    # here you train your model
    trainer.train()
def main(config):
    logger = config.get_logger('train')

    data_loader = config.init_obj('data_loader', module_dataloader)

    torch.hub.set_dir(config['weights_path'])
    model = config.init_obj('arch', module_model)
    logger.info(model)
    # FIXME: refactor needed
    if config['data_loader']['args']['self_supervised']:
        criterion = torch.nn.CrossEntropyLoss()
    else:
        criterion = torch.nn.CrossEntropyLoss(
            weight=data_loader.get_label_proportions().to('cuda'))
    metrics = [getattr(module_metric, met) for met in config['metrics']]

    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = config.init_obj('optimizer', torch.optim, trainable_params)

    lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler,
                                   optimizer)

    trainer = Trainer(model.get_model(),
                      criterion,
                      metrics,
                      optimizer,
                      config=config,
                      train_data_loader=data_loader.train,
                      valid_data_loader=data_loader.val,
                      test_data_loader=data_loader.test,
                      lr_scheduler=lr_scheduler)

    trainer.train()
Esempio n. 11
0
def run_big_net():
    global training_data, testing_data, n, t, training_data2

    training_data = load_data()
    testing_data = load_data(False)
    training_data2 = []

    print 'Data loaded...'

    layers = []
    layers.append({
        'type': 'input',
        'out_sx': 24,
        'out_sy': 24,
        'out_depth': 1
    })
    layers.append({
        'type': 'fc',
        'num_neurons': 100,
        'activation': 'relu',
        'drop_prob': 0.5
    })
    #layers.append({'type': 'fc', 'num_neurons': 800, 'activation': 'relu', 'drop_prob': 0.5})
    layers.append({'type': 'softmax', 'num_classes': 10})
    print 'Layers made...'

    n = Net(layers)
    print 'Net made...'
    print n

    t = Trainer(n, {'method': 'sgd', 'momentum': 0.0})
    print 'Trainer made...'

    print 'In training...'
    print 'k', 'time\t\t  ', 'loss\t  ', 'training accuracy'
    print '----------------------------------------------------'
    try:
        for x, y in training_data:
            stats = t.train(x, y)
            print stats['k'], stats['time'], stats['loss'], stats['accuracy']
            training_data2.append((x, n.getPrediction()))
    except:  #hit control-c or other
        pass

    print 'In testing: 5000 trials'
    right = 0
    count = 5000
    for x, y in sample(testing_data, count):
        n.forward(x)
        right += n.getPrediction() == y
    accuracy = float(right) / count * 100
    print accuracy
Esempio n. 12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', type=str, default='configs/config.json')
    parser.add_argument('--no-cuda', action='store_true')
    parser.add_argument('--parallel', action='store_true')
    args = parser.parse_args()
    args.cuda = torch.cuda.is_available() and not args.no_cuda
    print(args)

    device = torch.device('cuda' if args.cuda else 'cpu')

    config = load_json(args.config)

    model = MNISTNet()
    if args.parallel:
        model = nn.DataParallel(model)
    model.to(device)

    optimizer = optim.Adam(model.parameters(), **config['adam'])
    scheduler = optim.lr_scheduler.StepLR(optimizer, **config['steplr'])

    train_loader, valid_loader = mnist_loader(**config['dataset'])

    trainer = Trainer(model, optimizer, train_loader, valid_loader, device)

    output_dir = os.path.join(config['output_dir'],
                              datetime.now().strftime('%Y%m%d_%H%M%S'))
    os.makedirs(output_dir, exist_ok=True)

    # save config to output dir
    save_json(config, os.path.join(output_dir, 'config.json'))

    for epoch in range(config['epochs']):
        scheduler.step()

        train_loss, train_acc = trainer.train()
        valid_loss, valid_acc = trainer.validate()

        print(
            'epoch: {}/{},'.format(epoch + 1, config['epochs']),
            'train loss: {:.4f}, train acc: {:.2f}%,'.format(
                train_loss, train_acc * 100),
            'valid loss: {:.4f}, valid acc: {:.2f}%'.format(
                valid_loss, valid_acc * 100))

        torch.save(
            model.state_dict(),
            os.path.join(output_dir, 'model_{:04d}.pt'.format(epoch + 1)))
Esempio n. 13
0
def start():
    global training_data, testing_data, n, t

    training_data = load_data()
    testing_data = load_data(False)

    print 'Data loaded...'

    layers = []
    layers.append({
        'type': 'input',
        'out_sx': 28,
        'out_sy': 28,
        'out_depth': 1
    })
    layers.append({
        'type': 'capsule',
        'num_neurons': 30,
        'num_recog': 3,
        'num_gen': 4,
        'num_pose': 2,
        'dx': 1,
        'dy': 0
    })
    layers.append({'type': 'regression', 'num_neurons': 28 * 28})
    print 'Layers made...'

    n = Net(layers)

    print 'Net made...'
    print n

    t = Trainer(n, {'method': 'sgd', 'batch_size': 20, 'l2_decay': 0.001})
    print 'Trainer made...'
Esempio n. 14
0
def start():
    global training_data, network, t, N

    training_data = load_data()
    print 'Data loaded...'

    layers = []
    layers.append({'type': 'input', 'out_sx': 1, 'out_sy': 1, 'out_depth': N})
    layers.append({'type': 'fc', 'num_neurons': 50, 'activation': 'sigmoid'})
    layers.append({'type': 'fc', 'num_neurons': 10, 'activation': 'sigmoid'})
    layers.append({'type': 'fc', 'num_neurons': 50, 'activation': 'sigmoid'})
    layers.append({'type': 'regression', 'num_neurons': N})

    print 'Layers made...'

    network = Net(layers)

    print 'Net made...'
    print network

    t = Trainer(network, {
        'method': 'adadelta',
        'batch_size': 4,
        'l2_decay': 0.0001
    })
Esempio n. 15
0
def start():
    global network, t

    layers = []
    layers.append({
        'type': 'input',
        'out_sx': 30,
        'out_sy': 30,
        'out_depth': 1
    })
    layers.append({'type': 'fc', 'num_neurons': 100, 'activation': 'sigmoid'})
    layers.append({'type': 'softmax', 'num_classes': 7})
    print 'Layers made...'

    network = Net(layers)
    print 'Net made...'
    print network

    t = Trainer(network, {
        'method': 'adadelta',
        'batch_size': 20,
        'l2_decay': 0.001
    })
    print 'Trainer made...'
    print t
Esempio n. 16
0
def start():
    global training_data, testing_data, n, t

    training_data = load_data()
    testing_data = load_data(False)

    print 'Data loaded...'

    layers = []
    layers.append({
        'type': 'input',
        'out_sx': 28,
        'out_sy': 28,
        'out_depth': 1
    })
    layers.append({'type': 'fc', 'num_neurons': 100, 'activation': 'sigmoid'})
    layers.append({'type': 'regression', 'num_neurons': 28 * 28})
    print 'Layers made...'

    n = Net(layers)

    print 'Net made...'
    print n

    t = Trainer(n, {'method': 'sgd', 'batch_size': 20, 'l2_decay': 0.001})
    print 'Trainer made...'
Esempio n. 17
0
def start():
    global training_data, testing_data, network, t, N

    all_data = load_data()
    shuffle(all_data)
    size = int(len(all_data) * 0.1)
    training_data, testing_data = all_data[size:], all_data[:size]
    print 'Data loaded, size: {}...'.format(len(all_data))

    layers = []
    layers.append({'type': 'input', 'out_sx': 1, 'out_sy': 1, 'out_depth': N})
    layers.append({'type': 'fc', 'num_neurons': 50, 'activation': 'sigmoid'})
    layers.append({'type': 'fc', 'num_neurons': 10, 'activation': 'sigmoid'})
    layers.append({'type': 'fc', 'num_neurons': 50, 'activation': 'sigmoid'})
    layers.append({'type': 'softmax', 'num_classes': N})

    print 'Layers made...'

    network = Net(layers)

    print 'Net made...'
    print network

    t = Trainer(network, {
        'method': 'adadelta',
        'batch_size': 10,
        'l2_decay': 0.0001
    })
Esempio n. 18
0
def main():
    # Get arguments parsed
    args = get_args()

    # Setup for logging
    output_dir = 'output/{}'.format(
        datetime.now(
            timezone('Asia/Shanghai')).strftime('%Y-%m-%d_%H-%M-%S-%f')[:-3])
    create_dir(output_dir)
    LogHelper.setup(log_path='{}/training.log'.format(output_dir),
                    level_str='INFO')
    _logger = logging.getLogger(__name__)

    # Save the configuration for logging purpose
    save_yaml_config(args, path='{}/config.yaml'.format(output_dir))

    # Reproducibility
    set_seed(args.seed)

    # Get dataset
    dataset = RealDataset(args.batch_size)
    _logger.info('Finished generating dataset')

    device = get_device()
    model = VAE(args.z_dim, args.num_hidden, args.input_dim, device)

    trainer = Trainer(args.batch_size, args.num_epochs, args.learning_rate)

    trainer.train_model(model=model,
                        dataset=dataset,
                        output_dir=output_dir,
                        device=device,
                        input_dim=args.input_dim)

    _logger.info('Finished training model')

    # Visualizations
    samples = sample_vae(model, args.z_dim, device)
    plot_samples(samples)

    plot_reconstructions(model, dataset, device)

    _logger.info('All Finished!')
Esempio n. 19
0
def run_big_net():
    global training_data, testing_data, n, t, training_data2

    training_data = load_data()
    testing_data = load_data(False)
    training_data2 = []

    print 'Data loaded...'

    layers = []
    layers.append({'type': 'input', 'out_sx': 24, 'out_sy': 24, 'out_depth': 1})
    layers.append({'type': 'fc', 'num_neurons': 100, 'activation': 'relu', 'drop_prob': 0.5})
    #layers.append({'type': 'fc', 'num_neurons': 800, 'activation': 'relu', 'drop_prob': 0.5})
    layers.append({'type': 'softmax', 'num_classes': 10})
    print 'Layers made...'

    n = Net(layers)
    print 'Net made...'
    print n

    t = Trainer(n, {'method': 'sgd', 'momentum': 0.0})
    print 'Trainer made...'

    print 'In training...'
    print 'k', 'time\t\t  ', 'loss\t  ', 'training accuracy'
    print '----------------------------------------------------'
    try:
        for x, y in training_data: 
            stats = t.train(x, y)
            print stats['k'], stats['time'], stats['loss'], stats['accuracy']
            training_data2.append((x, n.getPrediction()))
    except: #hit control-c or other
        pass

    print 'In testing: 5000 trials'
    right = 0
    count = 5000
    for x, y in sample(testing_data, count):
        n.forward(x)
        right += n.getPrediction() == y
    accuracy = float(right) / count * 100
    print accuracy
def run(config, norm2d):

    train_loader, valid_loader = cifar10_loader(config.root, config.batch_size)

    model = CIFAR10Net(norm2d=norm2d)
    if config.cuda:
        model.cuda()

    optimizer = optim.Adam(model.parameters(), lr=config.lr, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)

    trainer = Trainer(model,
                      optimizer,
                      train_loader,
                      valid_loader,
                      use_cuda=config.cuda)

    valid_acc_list = []
    for epoch in range(config.epochs):
        start = time()

        scheduler.step()

        train_loss, train_acc = trainer.train(epoch)
        valid_loss, valid_acc = trainer.validate()

        print(
            'epoch: {}/{},'.format(epoch + 1, config.epochs),
            'train loss: {:.4f}, train acc: {:.2f}%,'.format(
                train_loss, train_acc * 100),
            'valid loss: {:.4f}, valid acc: {:.2f}%,'.format(
                valid_loss,
                valid_acc * 100), 'time: {:.2f}s'.format(time() - start))

        save_dir = os.path.join(config.save_dir, norm2d)
        os.makedirs(save_dir, exist_ok=True)
        torch.save(model.state_dict(),
                   os.path.join(save_dir, 'model_{:04d}.pt'.format(epoch + 1)))

        valid_acc_list.append(valid_acc)

    return valid_acc_list
Esempio n. 21
0
def run_small_net():
    global training_data2, n2, t2, testing_data

    layers = []
    layers.append({
        'type': 'input',
        'out_sx': 24,
        'out_sy': 24,
        'out_depth': 1
    })
    #layers.append({'type': 'fc', 'num_neurons': 50, 'activation': 'relu'})
    layers.append({'type': 'softmax', 'num_classes': 10})
    print 'Layers made...'

    n2 = Net(layers)
    print 'Smaller Net made...'
    print n2

    t2 = Trainer(n2, {'method': 'sgd', 'momentum': 0.0})
    print 'Trainer made for smaller net...'

    print 'In training of smaller net...'
    print 'k', 'time\t\t  ', 'loss\t  ', 'training accuracy'
    print '----------------------------------------------------'
    try:
        for x, y in training_data2:
            stats = t2.train(x, y)
            print stats['k'], stats['time'], stats['loss'], stats['accuracy']
    except:  #hit control-c or other
        pass

    print 'Testing smaller net: 5000 trials'
    right = 0
    count = 5000
    for x, y in sample(testing_data, count):
        n2.forward(x)
        right += n2.getPrediction() == y
    accuracy = float(right) / count * 100
    print accuracy
Esempio n. 22
0
def start():
    global network, sgd

    layers = []
    layers.append({'type': 'input', 'out_sx': 1, 'out_sy': 1, 'out_depth': 4})
    layers.append({'type': 'softmax', 'num_classes': 3})  #svm works too
    print 'Layers made...'

    network = Net(layers)
    print 'Net made...'
    print network

    sgd = Trainer(network, {'momentum': 0.1, 'l2_decay': 0.001})
    print 'Trainer made...'
    print sgd
Esempio n. 23
0
def start():
    global network, sgd

    layers = []
    layers.append({'type': 'input', 'out_sx': 1, 'out_sy': 1, 'out_depth': 7})
    #layers.append({'type': 'fc', 'num_neurons': 30, 'activation': 'relu'})
    #layers.append({'type': 'fc', 'num_neurons': 30, 'activation': 'relu'})
    layers.append({'type': 'softmax', 'num_classes': 2})  #svm works too
    print 'Layers made...'

    network = Net(layers)
    print 'Net made...'
    print network

    sgd = Trainer(network, {'momentum': 0.2, 'l2_decay': 0.001})
    print 'Trainer made...'
    print sgd
Esempio n. 24
0
def test_trainer(model, optimizer, scheduler, data_loader, criterion):

    args = DictConfig({'experiment': {'debug': False}})
    App.init(args)
    output_dirpath = Path.cwd()

    # basic training run of model
    pre_training_model = copy.deepcopy(model)
    logger = Logger()
    trainer = Trainer(model=model,
                      train_dataloader=data_loader,
                      val_dataloader=data_loader,
                      optimizer=optimizer,
                      scheduler=scheduler,
                      epochs=2,
                      logger=logger,
                      debug=True,
                      criterion=criterion,
                      output_path=output_dirpath)
    trainer.train()
    check_variable_change(pre_training_model, model)
    assert trainer.epochs_trained == 2
    assert len(trainer.logger.metrics_dict) == 4
    assert (0 <= trainer.logger.metrics_dict['accuracy'] <= 1)

    # check that can load from checkpoint
    pre_training_model = copy.deepcopy(model)
    trainer = Trainer(model=model,
                      train_dataloader=data_loader,
                      val_dataloader=data_loader,
                      optimizer=optimizer,
                      scheduler=scheduler,
                      epochs=4,
                      logger=logger,
                      debug=True,
                      criterion=criterion,
                      checkpoint='last.pth',
                      output_path=output_dirpath)
    trainer.train()
    os.remove('last.pth')
    os.remove('best.pth')
    check_variable_change(pre_training_model, model)
    assert trainer.epochs_trained == 4
Esempio n. 25
0
def main(not_parsed_args):
    if len(not_parsed_args) > 1:
        print("Unknown args:%s" % not_parsed_args)
        exit()
    dpt = os.path.join(DATA_DIR, FLAGS.data_file)
    testp = os.path.join(DATA_DIR, FLAGS.test_file)
    DL = MnistLoader(testp, FLAGS, dpt)
    md = ConvLSTMNetwork(FLAGS)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = False
    sess = tf.Session(config=config, graph=md.graph)
    print('Sess created.')
    trainer = Trainer(DL, md, sess, FLAGS)
    trainer.load_model()
    trainer.train()
    sess.close()
Esempio n. 26
0
def start(conv):
    global training_data, testing_data, n, t

    training_data = load_data()
    testing_data = load_data(False)

    print 'Data loaded...'

    layers = []
    layers.append({
        'type': 'input',
        'out_sx': 24,
        'out_sy': 24,
        'out_depth': 1
    })
    if conv:
        layers.append({
            'type': 'conv',
            'sx': 5,
            'filters': 8,
            'stride': 1,
            'pad': 2,
            'activation': 'relu',
            'drop_prob': 0.5
        })
        layers.append({'type': 'pool', 'sx': 2, 'stride': 2, 'drop_prob': 0.5})
    else:
        layers.append({'type': 'fc', 'num_neurons': 100, 'activation': 'relu'})
        #layers.append({'type': 'sim', 'num_neurons': 100, 'activation': 'mex'})
    layers.append({'type': 'softmax', 'num_classes': 10})

    print 'Layers made...'

    n = Net(layers)

    print 'Net made...'
    print n

    t = Trainer(n, {'method': 'adadelta', 'batch_size': 20, 'l2_decay': 0.001})
    print 'Trainer made...'
Esempio n. 27
0
def start():
    global training_data, n, t

    training_data = load_data()

    print 'Data loaded...'

    layers = []
    layers.append({'type': 'input', 'out_sx': 1, 'out_sy': 1, 'out_depth': 255})
    layers.append({'type': 'fc', 'num_neurons': 100, 'activation': 'sigmoid'})
    layers.append({'type': 'softmax', 'num_classes': 255})

    print 'Layers made...'

    n = Net(layers)

    print 'Net made...'
    print n

    t = Trainer(n, {'method': 'adadelta', 'batch_size': 10, 'l2_decay': 0.0001});

    print 'Trainer made...'
Esempio n. 28
0
    def __init__(self, num_states, num_actions, opt={}):
        """
        in number of time steps, of temporal memory
        the ACTUAL input to the net will be (x,a) temporal_window times, and followed by current x
        so to have no information from previous time step going into value function, set to 0.
        """
        self.temporal_window = getopt(opt, 'temporal_window', 1)
        """size of experience replay memory"""
        self.experience_size = getopt(opt, 'experience_size', 30000)
        """number of examples in experience replay memory before we begin learning"""
        self.start_learn_threshold = getopt(
            opt, 'start_learn_threshold',
            int(min(self.experience_size * 0.1, 1000)))
        """gamma is a crucial parameter that controls how much plan-ahead the agent does. In [0,1]"""
        self.gamma = getopt(opt, 'gamma', 0.8)
        """number of steps we will learn for"""
        self.learning_steps_total = getopt(opt, 'learning_steps_total', 100000)
        """how many steps of the above to perform only random actions (in the beginning)?"""
        self.learning_steps_burnin = getopt(opt, 'learning_steps_burnin', 3000)
        """what epsilon value do we bottom out on? 0.0 => purely deterministic policy at end"""
        self.epsilon_min = getopt(opt, 'epsilon_min', 0.05)
        """what epsilon to use at test time? (i.e. when learning is disabled)"""
        self.epsilon_test_time = getopt(opt, 'epsilon_test_time', 0.01)
        """
        advanced feature. Sometimes a random action should be biased towards some values
        for example in flappy bird, we may want to choose to not flap more often
        """
        if 'random_action_distribution' in opt:
            #this better sum to 1 by the way, and be of length this.num_actions
            self.random_action_distribution = opt['random_action_distribution']

            if len(self.random_action_distribution) != num_actions:
                print 'TROUBLE. random_action_distribution should be same length as num_actions.'

            a = self.random_action_distribution
            s = sum(a)
            if abs(s - 1.0) > 0.0001:
                print 'TROUBLE. random_action_distribution should sum to 1!'
            else:
                self.random_action_distribution = []
        """
        states that go into neural net to predict optimal action look as
        x0,a0,x1,a1,x2,a2,...xt
        this variable controls the size of that temporal window. Actions are
        encoded as 1-of-k hot vectors
        """
        self.net_inputs = num_states * self.temporal_window + num_actions * self.temporal_window + num_states
        self.num_states = num_states
        self.num_actions = num_actions
        self.window_size = max(
            self.temporal_window,
            2)  #must be at least 2, but if we want more context even more
        self.state_window = zeros(self.window_size)
        self.action_window = zeros(self.window_size)
        self.reward_window = zeros(self.window_size)
        self.net_window = zeros(self.window_size)

        #create [state -> value of all possible actions] modeling net for the value function
        layers = []
        if 'layers' in opt:
            """
            this is an advanced usage feature, because size of the input to the network, and number of
            actions must check out. 
            """
            layers = opt['layers']

            if len(layers) < 2:
                print 'TROUBLE! must have at least 2 layers'
            if layers[0]['type'] != 'input':
                print 'TROUBLE! first layer must be input layer!'
            if layers[-1]['type'] != 'regression':
                print 'TROUBLE! last layer must be input regression!'
            if layers[0]['out_depth'] * layers[0]['out_sx'] * layers[0][
                    'out_sy'] != self.net_inputs:
                print 'TROUBLE! Number of inputs must be num_states * temporal_window + num_actions * temporal_window + num_states!'
            if layers[-1]['num_neurons'] != self.num_actions:
                print 'TROUBLE! Number of regression neurons should be num_actions!'
        else:
            #create a very simple neural net by default
            layers.append({
                'type': 'input',
                'out_sx': 1,
                'out_sy': 1,
                'out_depth': self.net_inputs
            })
            if 'hidden_layer_sizes' in opt:
                #allow user to specify this via the option, for convenience
                for size in opt['hidden_layer_sizes']:
                    layers.append({
                        'type': 'fc',
                        'num_neurons': size,
                        'activation': 'relu'
                    })
            layers.append({
                'type': 'regression',
                'num_neurons': self.num_actions
            })  #value function output

        self.value_net = Net(layers)

        #and finally we need a Temporal Difference Learning trainer!
        trainer_ops_default = {
            'learning_rate': 0.01,
            'momentum': 0.0,
            'batch_size': 64,
            'l2_decay': 0.01
        }
        tdtrainer_options = getopt(opt, 'tdtrainer_options',
                                   trainer_ops_default)
        self.tdtrainer = Trainer(self.value_net, tdtrainer_options)

        #experience replay
        self.experience = []

        #various housekeeping variables
        self.age = 0  #incremented every backward()
        self.forward_passes = 0  #incremented every forward()
        self.epsilon = 1.0  #controls exploration exploitation tradeoff. Should be annealed over time
        self.latest_reward = 0
        self.last_input_array = []
        self.average_reward_window = Window(1000, 10)
        self.average_loss_window = Window(1000, 10)
        self.learning = True
Esempio n. 29
0
class Brain(object):
    def __init__(self, num_states, num_actions, opt={}):
        """
        in number of time steps, of temporal memory
        the ACTUAL input to the net will be (x,a) temporal_window times, and followed by current x
        so to have no information from previous time step going into value function, set to 0.
        """
        self.temporal_window = getopt(opt, 'temporal_window', 1)
        """size of experience replay memory"""
        self.experience_size = getopt(opt, 'experience_size', 30000)
        """number of examples in experience replay memory before we begin learning"""
        self.start_learn_threshold = getopt(
            opt, 'start_learn_threshold',
            int(min(self.experience_size * 0.1, 1000)))
        """gamma is a crucial parameter that controls how much plan-ahead the agent does. In [0,1]"""
        self.gamma = getopt(opt, 'gamma', 0.8)
        """number of steps we will learn for"""
        self.learning_steps_total = getopt(opt, 'learning_steps_total', 100000)
        """how many steps of the above to perform only random actions (in the beginning)?"""
        self.learning_steps_burnin = getopt(opt, 'learning_steps_burnin', 3000)
        """what epsilon value do we bottom out on? 0.0 => purely deterministic policy at end"""
        self.epsilon_min = getopt(opt, 'epsilon_min', 0.05)
        """what epsilon to use at test time? (i.e. when learning is disabled)"""
        self.epsilon_test_time = getopt(opt, 'epsilon_test_time', 0.01)
        """
        advanced feature. Sometimes a random action should be biased towards some values
        for example in flappy bird, we may want to choose to not flap more often
        """
        if 'random_action_distribution' in opt:
            #this better sum to 1 by the way, and be of length this.num_actions
            self.random_action_distribution = opt['random_action_distribution']

            if len(self.random_action_distribution) != num_actions:
                print 'TROUBLE. random_action_distribution should be same length as num_actions.'

            a = self.random_action_distribution
            s = sum(a)
            if abs(s - 1.0) > 0.0001:
                print 'TROUBLE. random_action_distribution should sum to 1!'
            else:
                self.random_action_distribution = []
        """
        states that go into neural net to predict optimal action look as
        x0,a0,x1,a1,x2,a2,...xt
        this variable controls the size of that temporal window. Actions are
        encoded as 1-of-k hot vectors
        """
        self.net_inputs = num_states * self.temporal_window + num_actions * self.temporal_window + num_states
        self.num_states = num_states
        self.num_actions = num_actions
        self.window_size = max(
            self.temporal_window,
            2)  #must be at least 2, but if we want more context even more
        self.state_window = zeros(self.window_size)
        self.action_window = zeros(self.window_size)
        self.reward_window = zeros(self.window_size)
        self.net_window = zeros(self.window_size)

        #create [state -> value of all possible actions] modeling net for the value function
        layers = []
        if 'layers' in opt:
            """
            this is an advanced usage feature, because size of the input to the network, and number of
            actions must check out. 
            """
            layers = opt['layers']

            if len(layers) < 2:
                print 'TROUBLE! must have at least 2 layers'
            if layers[0]['type'] != 'input':
                print 'TROUBLE! first layer must be input layer!'
            if layers[-1]['type'] != 'regression':
                print 'TROUBLE! last layer must be input regression!'
            if layers[0]['out_depth'] * layers[0]['out_sx'] * layers[0][
                    'out_sy'] != self.net_inputs:
                print 'TROUBLE! Number of inputs must be num_states * temporal_window + num_actions * temporal_window + num_states!'
            if layers[-1]['num_neurons'] != self.num_actions:
                print 'TROUBLE! Number of regression neurons should be num_actions!'
        else:
            #create a very simple neural net by default
            layers.append({
                'type': 'input',
                'out_sx': 1,
                'out_sy': 1,
                'out_depth': self.net_inputs
            })
            if 'hidden_layer_sizes' in opt:
                #allow user to specify this via the option, for convenience
                for size in opt['hidden_layer_sizes']:
                    layers.append({
                        'type': 'fc',
                        'num_neurons': size,
                        'activation': 'relu'
                    })
            layers.append({
                'type': 'regression',
                'num_neurons': self.num_actions
            })  #value function output

        self.value_net = Net(layers)

        #and finally we need a Temporal Difference Learning trainer!
        trainer_ops_default = {
            'learning_rate': 0.01,
            'momentum': 0.0,
            'batch_size': 64,
            'l2_decay': 0.01
        }
        tdtrainer_options = getopt(opt, 'tdtrainer_options',
                                   trainer_ops_default)
        self.tdtrainer = Trainer(self.value_net, tdtrainer_options)

        #experience replay
        self.experience = []

        #various housekeeping variables
        self.age = 0  #incremented every backward()
        self.forward_passes = 0  #incremented every forward()
        self.epsilon = 1.0  #controls exploration exploitation tradeoff. Should be annealed over time
        self.latest_reward = 0
        self.last_input_array = []
        self.average_reward_window = Window(1000, 10)
        self.average_loss_window = Window(1000, 10)
        self.learning = True

    def random_action(self):
        """
        a bit of a helper function. It returns a random action
        we are abstracting this away because in future we may want to 
        do more sophisticated things. For example some actions could be more
        or less likely at "rest"/default state.
        """

        if len(random_action_distribution) == 0:
            return randi(0, self.num_actions)
        else:
            #okay, lets do some fancier sampling
            p = randf(0, 1.0)
            cumprob = 0.0
            for k in xrange(self.num_actions):
                cumprob += self.random_action_distribution[k]
                if p < cumprob:
                    return k

    def policy(self, s):
        """
        compute the value of doing any action in this state
        and return the argmax action and its value
        """

        V = Vol(s)
        action_values = self.value_net.forward(V)
        weights = action_values.w
        max_val = max(weights)
        max_k = weights.index(maxval)
        return {'action': max_k, 'value': max_val}

    def getNetInput(self, xt):
        """
        return s = (x,a,x,a,x,a,xt) state vector
        It's a concatenation of last window_size (x,a) pairs and current state x
        """

        w = []
        w.extend(xt)  #start with current state
        #and now go backwards and append states and actions from history temporal_window times
        n = self.window_size
        for k in xrange(self.temporal_window):
            index = n - 1 - k
            w.extend(self.state_window[index])  #state

            #action, encoded as 1-of-k indicator vector. We scale it up a bit because
            #we dont want weight regularization to undervalue this information, as it only exists once
            action1ofk = zeros(self.num_actions)
            action1ofk[index] = 1.0 * self.num_states
            w.extend(action1ofk)

        return w

    def forward(self, input_array):
        self.forward_passes += 1
        self.last_input_array = input_array

        # create network input
        action = None
        if self.forward_passes > self.temporal_window:
            #we have enough to actually do something reasonable
            net_input = self.getNetInput(input_array)
            if self.learning:
                #compute epsilon for the epsilon-greedy policy
                self.epsilon = min(
                    1.0,
                    max(
                        self.epsilon_min,
                        1.0 - \
                        (self.age - self.learning_steps_burnin) / \
                        (self.learning_steps_total - self.learning_steps_burnin)
                    )
                )
            else:
                self.epsilon = self.epsilon_test_time  #use test-time value

            rf = randf(0, 1)
            if rf < self.epsilon:
                #choose a random action with epsilon probability
                action = self.random_action()
            else:
                #otherwise use our policy to make decision
                maxact = self.policy(net_input)
                action = maxact['action']
        else:
            #pathological case that happens first few iterations
            #before we accumulate window_size inputs
            net_input = []
            action = self.random_action()

        #remember the state and action we took for backward pass
        self.net_window.pop(0)
        self.net_window.append(net_input)
        self.state_window.pop(0)
        self.state_window.append(input_array)
        self.action_window.pop(0)
        self.action_window.append(action)

    def backward(self, reward):
        self.latest_reward = reward
        self.average_reward_window.add(reward)
        self.reward_window.pop(0)
        self.reward_window.append(reward)

        if not self.learning:
            return

        self.age += 1

        #it is time t+1 and we have to store (s_t, a_t, r_t, s_{t+1}) as new experience
        #(given that an appropriate number of state measurements already exist, of course)
        if self.forward_passes > self.temporal_window + 1:
            n = self.window_size
            e = Experience(self.net_window[n - 2], self.action_window[n - 2],
                           self.reward_window[n - 2], self.net_window[n - 1])

            if len(self.experience) < self.experience_size:
                self.experience.append(e)
            else:
                ri = randi(0, self.experience_size)
                self.experience[ri] = e

        #learn based on experience, once we have some samples to go on
        #this is where the magic happens...
        if len(self.experience) > self.start_learn_threshold:
            avcost = 0.0

            for k in xrange(self.tdtrainer.batch_size):
                re = randi(0, len(self.experience))
                e = self.experience[re]
                x = Vol(1, 1, self.net_inputs)
                x.w = e.state0
                maxact = self.policy(e.state1)
                r = e.reward0 + self.gamma * maxact.value
                ystruct = {'dim': e.action0, 'val': r}
                stats = self.tdtrainer.train(x, ystruct)
                avcost += stats['loss']

            avcost /= self.tdtrainer.batch_size
            self.average_loss_window.add(avcost)
Esempio n. 30
0
    
    
    agent = create_third_level_agent(concept_path, args.load_concept_id, args.n_concepts, noisy=noisy, 
        n_heads=n_heads, init_log_alpha=args.init_log_alpha, latent_dim=args.vision_latent_dim, 
        parallel=args.parallel_q_nets, lr=args.lr, lr_alpha=args.lr_alpha, lr_actor=args.lr_actor, min_entropy_factor=args.entropy_factor, 
        lr_c=args.lr_c, lr_Alpha=args.lr_c_Alpha, entropy_update_rate=args.entropy_update_rate, init_Epsilon=args.init_epsilon_MC,
        delta_Epsilon=args.delta_epsilon_MC)
    
    if args.load_id is not None:
        if args.load_best:
            agent.load(MODEL_PATH + env_name + '/best_', args.load_id)
        else:
            agent.load(MODEL_PATH + env_name + '/last_', args.load_id)
    agents = collections.deque(maxlen=args.n_agents)
    agents.append(agent)
    
    os.makedirs(MODEL_PATH + env_name, exist_ok=True)

    database = ExperienceBuffer(buffer_size, level=2)

    trainer = Trainer(optimizer_kwargs=optimizer_kwargs)
    returns = trainer.loop(env, agents, database, n_episodes=n_episodes, render=args.render, 
                            max_episode_steps=n_steps_in_second_level_episode, 
                            store_video=store_video, wandb_project=wandb_project, 
                            MODEL_PATH=MODEL_PATH, train=(not args.eval),
                            initialization=initialization, init_buffer_size=init_buffer_size,
                            save_step_each=save_step_each, train_each=args.train_each, 
                            n_step_td=n_step_td, train_n_MC=args.train_n_mc, rest_n_MC=args.rest_n_mc,
                            eval_MC=args.eval_MC)
    G = returns.mean()    
    print("Mean episode return: {:.2f}".format(G)) 
Esempio n. 31
0
    def __init__(self, num_states, num_actions, opt={}):
        """
        in number of time steps, of temporal memory
        the ACTUAL input to the net will be (x,a) temporal_window times, and followed by current x
        so to have no information from previous time step going into value function, set to 0.
        """
        self.temporal_window = getopt(opt, 'temporal_window', 1)

        """size of experience replay memory"""
        self.experience_size = getopt(opt, 'experience_size', 30000)

        """number of examples in experience replay memory before we begin learning"""
        self.start_learn_threshold = getopt(opt, 'start_learn_threshold',
                                            int(min(self.experience_size * 0.1, 1000)))

        """gamma is a crucial parameter that controls how much plan-ahead the agent does. In [0,1]"""
        self.gamma = getopt(opt, 'gamma', 0.8)

        """number of steps we will learn for"""
        self.learning_steps_total = getopt(opt, 'learning_steps_total', 100000)

        """how many steps of the above to perform only random actions (in the beginning)?"""
        self.learning_steps_burnin = getopt(opt, 'learning_steps_burnin', 3000)

        """what epsilon value do we bottom out on? 0.0 => purely deterministic policy at end"""
        self.epsilon_min = getopt(opt, 'epsilon_min', 0.05)

        """what epsilon to use at test time? (i.e. when learning is disabled)"""
        self.epsilon_test_time = getopt(opt, 'epsilon_test_time', 0.01)

        """
        advanced feature. Sometimes a random action should be biased towards some values
        for example in flappy bird, we may want to choose to not flap more often
        """
        if 'random_action_distribution' in opt:
            #this better sum to 1 by the way, and be of length this.num_actions
            self.random_action_distribution = opt['random_action_distribution']

            if len(self.random_action_distribution) != num_actions:
                print 'TROUBLE. random_action_distribution should be same length as num_actions.'

            a = self.random_action_distribution
            s = sum(a)
            if abs(s - 1.0) > 0.0001:
                print 'TROUBLE. random_action_distribution should sum to 1!'
            else:
                self.random_action_distribution = []

        """
        states that go into neural net to predict optimal action look as
        x0,a0,x1,a1,x2,a2,...xt
        this variable controls the size of that temporal window. Actions are
        encoded as 1-of-k hot vectors
        """
        self.net_inputs = num_states * self.temporal_window + num_actions * self.temporal_window + num_states        
        self.num_states = num_states
        self.num_actions = num_actions
        self.window_size = max(self.temporal_window, 2) #must be at least 2, but if we want more context even more
        self.state_window = zeros(self.window_size)
        self.action_window = zeros(self.window_size)
        self.reward_window = zeros(self.window_size)
        self.net_window = zeros(self.window_size)

        #create [state -> value of all possible actions] modeling net for the value function
        layers = []
        if 'layers' in opt:
            """
            this is an advanced usage feature, because size of the input to the network, and number of
            actions must check out. 
            """
            layers = opt['layers']

            if len(layers) < 2:
                print 'TROUBLE! must have at least 2 layers'
            if layers[0]['type'] != 'input':
                print 'TROUBLE! first layer must be input layer!'
            if layers[-1]['type'] != 'regression':
                print 'TROUBLE! last layer must be input regression!'
            if layers[0]['out_depth'] * layers[0]['out_sx'] * layers[0]['out_sy'] != self.net_inputs:
                print 'TROUBLE! Number of inputs must be num_states * temporal_window + num_actions * temporal_window + num_states!'
            if layers[-1]['num_neurons'] != self.num_actions:
                print 'TROUBLE! Number of regression neurons should be num_actions!'
        else:
            #create a very simple neural net by default
            layers.append({'type': 'input', 'out_sx': 1, 'out_sy': 1, 'out_depth': self.net_inputs})
            if 'hidden_layer_sizes' in opt:
                #allow user to specify this via the option, for convenience
                for size in opt['hidden_layer_sizes']:
                    layers.append({'type': 'fc', 'num_neurons': size, 'activation': 'relu'})
            layers.append({'type': 'regression', 'num_neurons': self.num_actions}) #value function output

        self.value_net = Net(layers)

        #and finally we need a Temporal Difference Learning trainer!
        trainer_ops_default = {'learning_rate': 0.01, 'momentum': 0.0, 'batch_size': 64, 'l2_decay': 0.01}
        tdtrainer_options = getopt(opt, 'tdtrainer_options', trainer_ops_default)
        self.tdtrainer = Trainer(self.value_net, tdtrainer_options)

        #experience replay
        self.experience = []

        #various housekeeping variables
        self.age = 0            #incremented every backward()
        self.forward_passes = 0 #incremented every forward()
        self.epsilon = 1.0      #controls exploration exploitation tradeoff. Should be annealed over time
        self.latest_reward = 0
        self.last_input_array = []
        self.average_reward_window = Window(1000, 10)
        self.average_loss_window = Window(1000, 10)
        self.learning = True
Esempio n. 32
0
class Brain(object):

    def __init__(self, num_states, num_actions, opt={}):
        """
        in number of time steps, of temporal memory
        the ACTUAL input to the net will be (x,a) temporal_window times, and followed by current x
        so to have no information from previous time step going into value function, set to 0.
        """
        self.temporal_window = getopt(opt, 'temporal_window', 1)

        """size of experience replay memory"""
        self.experience_size = getopt(opt, 'experience_size', 30000)

        """number of examples in experience replay memory before we begin learning"""
        self.start_learn_threshold = getopt(opt, 'start_learn_threshold',
                                            int(min(self.experience_size * 0.1, 1000)))

        """gamma is a crucial parameter that controls how much plan-ahead the agent does. In [0,1]"""
        self.gamma = getopt(opt, 'gamma', 0.8)

        """number of steps we will learn for"""
        self.learning_steps_total = getopt(opt, 'learning_steps_total', 100000)

        """how many steps of the above to perform only random actions (in the beginning)?"""
        self.learning_steps_burnin = getopt(opt, 'learning_steps_burnin', 3000)

        """what epsilon value do we bottom out on? 0.0 => purely deterministic policy at end"""
        self.epsilon_min = getopt(opt, 'epsilon_min', 0.05)

        """what epsilon to use at test time? (i.e. when learning is disabled)"""
        self.epsilon_test_time = getopt(opt, 'epsilon_test_time', 0.01)

        """
        advanced feature. Sometimes a random action should be biased towards some values
        for example in flappy bird, we may want to choose to not flap more often
        """
        if 'random_action_distribution' in opt:
            #this better sum to 1 by the way, and be of length this.num_actions
            self.random_action_distribution = opt['random_action_distribution']

            if len(self.random_action_distribution) != num_actions:
                print 'TROUBLE. random_action_distribution should be same length as num_actions.'

            a = self.random_action_distribution
            s = sum(a)
            if abs(s - 1.0) > 0.0001:
                print 'TROUBLE. random_action_distribution should sum to 1!'
            else:
                self.random_action_distribution = []

        """
        states that go into neural net to predict optimal action look as
        x0,a0,x1,a1,x2,a2,...xt
        this variable controls the size of that temporal window. Actions are
        encoded as 1-of-k hot vectors
        """
        self.net_inputs = num_states * self.temporal_window + num_actions * self.temporal_window + num_states        
        self.num_states = num_states
        self.num_actions = num_actions
        self.window_size = max(self.temporal_window, 2) #must be at least 2, but if we want more context even more
        self.state_window = zeros(self.window_size)
        self.action_window = zeros(self.window_size)
        self.reward_window = zeros(self.window_size)
        self.net_window = zeros(self.window_size)

        #create [state -> value of all possible actions] modeling net for the value function
        layers = []
        if 'layers' in opt:
            """
            this is an advanced usage feature, because size of the input to the network, and number of
            actions must check out. 
            """
            layers = opt['layers']

            if len(layers) < 2:
                print 'TROUBLE! must have at least 2 layers'
            if layers[0]['type'] != 'input':
                print 'TROUBLE! first layer must be input layer!'
            if layers[-1]['type'] != 'regression':
                print 'TROUBLE! last layer must be input regression!'
            if layers[0]['out_depth'] * layers[0]['out_sx'] * layers[0]['out_sy'] != self.net_inputs:
                print 'TROUBLE! Number of inputs must be num_states * temporal_window + num_actions * temporal_window + num_states!'
            if layers[-1]['num_neurons'] != self.num_actions:
                print 'TROUBLE! Number of regression neurons should be num_actions!'
        else:
            #create a very simple neural net by default
            layers.append({'type': 'input', 'out_sx': 1, 'out_sy': 1, 'out_depth': self.net_inputs})
            if 'hidden_layer_sizes' in opt:
                #allow user to specify this via the option, for convenience
                for size in opt['hidden_layer_sizes']:
                    layers.append({'type': 'fc', 'num_neurons': size, 'activation': 'relu'})
            layers.append({'type': 'regression', 'num_neurons': self.num_actions}) #value function output

        self.value_net = Net(layers)

        #and finally we need a Temporal Difference Learning trainer!
        trainer_ops_default = {'learning_rate': 0.01, 'momentum': 0.0, 'batch_size': 64, 'l2_decay': 0.01}
        tdtrainer_options = getopt(opt, 'tdtrainer_options', trainer_ops_default)
        self.tdtrainer = Trainer(self.value_net, tdtrainer_options)

        #experience replay
        self.experience = []

        #various housekeeping variables
        self.age = 0            #incremented every backward()
        self.forward_passes = 0 #incremented every forward()
        self.epsilon = 1.0      #controls exploration exploitation tradeoff. Should be annealed over time
        self.latest_reward = 0
        self.last_input_array = []
        self.average_reward_window = Window(1000, 10)
        self.average_loss_window = Window(1000, 10)
        self.learning = True

    def random_action(self):
        """
        a bit of a helper function. It returns a random action
        we are abstracting this away because in future we may want to 
        do more sophisticated things. For example some actions could be more
        or less likely at "rest"/default state.
        """

        if len(random_action_distribution) == 0:
            return randi(0, self.num_actions)
        else:
            #okay, lets do some fancier sampling
            p = randf(0, 1.0)
            cumprob = 0.0
            for k in xrange(self.num_actions):
                cumprob += self.random_action_distribution[k]
                if p < cumprob:
                    return k

    def policy(self, s):
        """
        compute the value of doing any action in this state
        and return the argmax action and its value
        """

        V = Vol(s)
        action_values = self.value_net.forward(V)
        weights = action_values.w
        max_val = max(weights)
        max_k = weights.index(maxval)
        return {
            'action': max_k,
            'value': max_val
        }

    def getNetInput(self, xt):
        """
        return s = (x,a,x,a,x,a,xt) state vector
        It's a concatenation of last window_size (x,a) pairs and current state x
        """

        w = []
        w.extend(xt) #start with current state
        #and now go backwards and append states and actions from history temporal_window times
        n = self.window_size
        for k in xrange(self.temporal_window):
            index = n - 1 - k
            w.extend(self.state_window[index]) #state

            #action, encoded as 1-of-k indicator vector. We scale it up a bit because
            #we dont want weight regularization to undervalue this information, as it only exists once
            action1ofk = zeros(self.num_actions)
            action1ofk[index] = 1.0 * self.num_states
            w.extend(action1ofk)

        return w

    def forward(self, input_array):
        self.forward_passes += 1
        self.last_input_array = input_array

        # create network input
        action = None
        if self.forward_passes > self.temporal_window:
            #we have enough to actually do something reasonable
            net_input = self.getNetInput(input_array)
            if self.learning:
                #compute epsilon for the epsilon-greedy policy
                self.epsilon = min(
                    1.0,
                    max(
                        self.epsilon_min,
                        1.0 - \
                        (self.age - self.learning_steps_burnin) / \
                        (self.learning_steps_total - self.learning_steps_burnin)
                    )
                )
            else:
                self.epsilon = self.epsilon_test_time #use test-time value
            
            rf = randf(0, 1)
            if rf < self.epsilon:
                #choose a random action with epsilon probability
                action = self.random_action()
            else:
                #otherwise use our policy to make decision
                maxact = self.policy(net_input)
                action = maxact['action']
        else:
            #pathological case that happens first few iterations
            #before we accumulate window_size inputs
            net_input = []
            action = self.random_action()

        #remember the state and action we took for backward pass
        self.net_window.pop(0)
        self.net_window.append(net_input)
        self.state_window.pop(0)
        self.state_window.append(input_array)
        self.action_window.pop(0)
        self.action_window.append(action)

    def backward(self, reward):
        self.latest_reward = reward
        self.average_reward_window.add(reward)
        self.reward_window.pop(0)
        self.reward_window.append(reward)

        if not self.learning: 
            return

        self.age += 1

        #it is time t+1 and we have to store (s_t, a_t, r_t, s_{t+1}) as new experience
        #(given that an appropriate number of state measurements already exist, of course)
        if self.forward_passes > self.temporal_window + 1:
            n = self.window_size
            e = Experience(
                self.net_window[n - 2],
                self.action_window[n - 2],
                self.reward_window[n - 2],
                self.net_window[n - 1]
            )

            if len(self.experience) < self.experience_size:
                self.experience.append(e)
            else:
                ri = randi(0, self.experience_size)
                self.experience[ri] = e

        #learn based on experience, once we have some samples to go on
        #this is where the magic happens...
        if len(self.experience) > self.start_learn_threshold:
            avcost = 0.0

            for k in xrange(self.tdtrainer.batch_size):
                re = randi(0, len(self.experience))
                e = self.experience[re]
                x = Vol(1, 1, self.net_inputs)
                x.w = e.state0
                maxact = self.policy(e.state1)
                r = e.reward0 + self.gamma * maxact.value
                ystruct = {'dim': e.action0, 'val': r}
                stats = self.tdtrainer.train(x, ystruct)
                avcost += stats['loss']

            avcost /= self.tdtrainer.batch_size
            self.average_loss_window.add(avcost)
        "--vision_latent_dim",
        default=DEFAULT_VISION_LATENT_DIM,
        help="Dimensionality of feature vector added to inner state, default="
        + str(DEFAULT_VISION_LATENT_DIM))
    args = parser.parse_args()

    render_kwargs = {
        'pixels': {
            'width': 168,
            'height': 84,
            'camera_name': 'front_camera'
        }
    }

    database = ExperienceBuffer(args.buffer_size, level=3)
    trainer = Trainer()

    env_model_pairs = load_env_model_pairs(args.file)
    n_envs = len(env_model_pairs)
    n_episodes = (args.buffer_size * args.save_step_each) // args.n_steps
    store_video = False

    for env_number, (env_name, model_id) in enumerate(env_model_pairs.items()):
        task_database = ExperienceBuffer(args.buffer_size // n_envs, level=2)

        env = AntPixelWrapper(
            PixelObservationWrapper(gym.make(env_name).unwrapped,
                                    pixels_only=False,
                                    render_kwargs=render_kwargs.copy()))

        agent = load_agent(env_name, model_id, args.load_best,
Esempio n. 34
0
                           dim_z_motion, video_length)

image_discriminator = build_discriminator(image_discriminator,
                                          n_channels=n_channels,
                                          use_noise=use_noise,
                                          noise_sigma=noise_sigma)

video_discriminator = build_discriminator(video_discriminator,
                                          dim_categorical=dim_z_category,
                                          n_channels=n_channels,
                                          use_noise=use_noise,
                                          noise_sigma=noise_sigma)

if torch.cuda.is_available():
    generator.cuda()
    image_discriminator.cuda()
    video_discriminator.cuda()

trainer = Trainer(image_loader,
                  video_loader,
                  image_loader,
                  video_loader,
                  print_every,
                  batches,
                  log_folder,
                  use_cuda=torch.cuda.is_available(),
                  use_infogan=use_infogan,
                  use_categories=use_categories)

trainer.train(generator, image_discriminator, video_discriminator)
Esempio n. 35
0
File: main.py Progetto: flios/VDCNN
                                shuffle=not args.get('sort_dataset'),
                                num_workers=args.get('num_workers'))
# test_dataset = TextDataset(test_data, dictionary, args.get('sort_dataset'), args.get('min_length'), args.get('max_length'))
# test_dataloader = TextDataLoader(dataset=test_dataset, dictionary=dictionary, batch_size=args.get('batch_size'), shuffle = not args.get('sort_dataset'))

logger.info("Training...")
# trainable_params = [p for p in model.parameters() if p.requires_grad]
if args.get('optimizer') == 'Adam':
    optimizer = Adam(model.parameters(), lr=args.get('initial_lr'))
elif args.get('optimizer') == 'Adadelta':
    optimizer = Adadelta(params=trainable_params,
                         lr=args.get('initial_lr'),
                         weight_decay=0.95)
else:
    raise NotImplementedError()

lr_plateau = lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.2, patience=5)
criterion = nn.CrossEntropyLoss
trainer = Trainer(model,
                  train_dataloader,
                  val_dataloader,
                  criterion=criterion,
                  optimizer=optimizer,
                  lr_schedule=args.get('lr_schedule'),
                  lr_scheduler=lr_plateau,
                  use_gpu=args.get('use_gpu'),
                  logger=logger)
trainer.run(epochs=args.get('epochs'))
logger.info("Evaluating...")
logger.info('Best Model: {}'.format(trainer.best_checkpoint_filepath))