Beispiel #1
0
def run(config):
    print(config)
    checkConfigParams(config)
    np.random.seed(config["random_seed"])
    random.seed(config["random_seed"])

    dataset = Dataset(**config)
    # preconditions: the dataset has generated a pool of data, and the folds have been generated
    if config["kfcv"] == 1 and config["kfcv_serial"] == 1:
        for i in range(0, config["folds"]):
            neuralnet = ConvModel(dataset, **config)
            #validate that the folds work
            #dataset.kfcvPrintFoldInfo()
            #xit()
            gc.collect()
            runNet(neuralnet, config, i)

    # preconditions: the dataset has generated a pool of data, and the folds have been generated
    elif config["kfcv"] == 1:
        neuralnet = ConvModel(dataset, **config)
        runNet(neuralnet, config)

    # preconditions: the dataset has generated distinct training and testing set
    else:
        neuralnet = ConvModel(dataset, **config)
        runNet(neuralnet, config)
Beispiel #2
0
def main():
    args = parser.parse_args()

    if os.path.isdir(args.output_folder):
        raise Exception("Experiment name " + args.output_folder +
                        " already exists.")
    os.mkdir(args.output_folder)

    with open(args.output_folder + "/args.pckl", "wb") as f:
        pickle.dump(args, f)

    transform = None
    if args.normalize:
        transform = NormalizeFixedFactor(1280)
    # utterance_dict = build_dataset_structure(args.utterance_folder)
    # metadata_structure = [utterance_dict]

    if "Text" in args.model:
        dataset = FastTextPoseDataset(args.data,
                                      args.max_frames,
                                      transform,
                                      use_rand_tokens=args.rand_tokens)
    else:
        dataset = FastPoseDataset(args.data, args.max_frames, transform)

    loader = DataLoader(dataset, batch_size=1, collate_fn=collate_function)

    if args.model == "Conv":
        model = ConvModel(args.conv_channels,
                          activation="ReLU",
                          pos_emb=args.conv_pos_emb)
    elif args.model == "TransformerEncoder":
        model = TransformerEncoder(args, 100)
    elif args.model == "ConvTransformerEncoder":
        model = ConvTransformerEncoder(args, 21 * 2)
    elif args.model == "TransformerEnc":
        model = TransformerEnc(ninp=12 * 2,
                               nhead=4,
                               nhid=100,
                               nout=21 * 2,
                               nlayers=4,
                               dropout=0.0)
    elif args.model == "TextPoseTransformer":
        model = TextPoseTransformer(n_tokens=1000,
                                    n_joints=12,
                                    joints_dim=2,
                                    nhead=4,
                                    nhid=128,
                                    nout=21 * 2,
                                    n_enc_layers=4,
                                    n_dec_layers=4,
                                    dropout=args.transformer_dropout)
    else:
        raise ValueError()
    model.load_state_dict(torch.load(args.model_checkpoint))

    infer_utterance(model, loader, args)
Beispiel #3
0
def run(args, use_cuda, output_dir):

    trial_list = list(range(args.n_trials))
    np.random.shuffle(trial_list)

    for trial_i in trial_list:
        trial_dir = os.path.join(output_dir, 'trial_{}'.format(trial_i))
        os.makedirs(trial_dir, exist_ok=True)

        loaders, params = get_dataloaders(args.batch_size,
                                          trial_i,
                                          args.dataset,
                                          args.augment_data,
                                          early_stop=args.early_stop)

        if args.network_type == 'fc':
            model = DenseModel(input_dim=np.prod(params['input_shape']),
                               output_dim=params['output_dim'],
                               hidden_nodes=args.hidden_nodes,
                               num_modules=args.n_modules,
                               activation=args.activation)
        elif args.network_type == 'conv':
            model = ConvModel(input_shape=params['input_shape'],
                              output_dim=params['output_dim'],
                              num_filters=args.filters,
                              kernel_sizes=args.kernels,
                              strides=args.strides,
                              dilations=args.dilations,
                              num_modules=args.n_modules,
                              activation=args.activation,
                              final_layer=args.conv_final_layer)
        elif args.network_type == 'densenet':
            model = DenseNet(input_shape=params['input_shape'],
                             output_dim=params['output_dim'],
                             growth_rate=args.densenet_k,
                             depth=args.densenet_depth,
                             reduction=args.densenet_reduction,
                             bottleneck=args.densenet_bottleneck,
                             num_modules=args.n_modules)

        logging.debug(args)
        logging.debug('Parameters: {}'.format(model.n_parameters()))

        device = torch.device("cuda" if use_cuda else "cpu")
        model = model.to(device)
        model.reset_parameters()

        weight_path = os.path.join(trial_dir, 'initial_weights.pt')
        torch.save(model.state_dict(), weight_path)

        for lambda_i, (lambda_, learning_rate) in enumerate(
                zip(args.lambda_values, args.learning_rates)):
            model.load_state_dict(torch.load(weight_path))

            lambda_dir = os.path.join(trial_dir, str(lambda_))
            os.makedirs(lambda_dir, exist_ok=True)

            do_lambda_value(model, lambda_, learning_rate, args, loaders,
                            params['distribution'], device, lambda_dir)
def main():
    args = parser.parse_args()

    if os.path.isfile(args.out_h5data):
        raise Exception("Experiment name " + args.out_h5data + " already exists.")

    transforms = []
    # TODO Encode body points also differentially to some joint not only hand wrt wrist
    if args.dif_encoding:
        transforms.append(WristDifference())
        transforms.append(ChestDifference())
    # TODO Change Normalization scheme to fixed bone dist
    if args.normalize:
        transforms.append(NormalizeFixedFactor(1280))
    if args.predict == "right_index":
        n_input = 12 + 17
        n_output = 4
        transforms.append(BuildIndexItem())
    elif args.predict == "right_3fingers":
        n_input = 12 + 9
        n_output = 12
        transforms.append(Build3fingerItem())
    elif args.predict == "right_hand":
        # n_input = 12
        n_input = 8
        n_output = 21
        transforms.append(BuildRightHandItem())
    else:
        raise ValueError()

    transforms = torchvision.transforms.Compose(transforms)

    if "Text" in args.model:
        dataset = TextPoseH5Dataset(args.valid_h5data, args.valid_textdata, args.max_frames, transforms, selection=args.frames_selection,
                                      use_rand_tokens=args.rand_tokens)
    else:
        dataset = FastPoseDataset(args.data, args.max_frames, transforms)

    loader = DataLoader(dataset, batch_size=128, collate_fn=collate_function_h5)

    if args.model == "Conv":
        model = ConvModel(args.conv_channels, "ReLU", pos_emb=args.conv_pos_emb)
    elif args.model == "ConvTransformerEncoder":
        model = ConvTransformerEncoder(args, 21 * 2)
    elif args.model == "TransformerEnc":
        model = TransformerEnc(ninp=12*2, nhead=4, nhid=128, nout=21*2,
                               nlayers=4, dropout=args.transformer_dropout)
    elif args.model == "TextPoseTransformer":
        model = TextPoseTransformer(n_tokens=1000, n_joints=n_input, joints_dim=2, nhead=4,
                                    nhid=128, nout=n_output*2, n_enc_layers=4, n_dec_layers=4,
                                    dropout=args.transformer_dropout)
    else:
        raise ValueError()

    model.load_state_dict(torch.load(args.model_checkpoint))

    infer_utterance_h5(model, loader, args)
Beispiel #5
0
def create_model_and_optimizer(args, dataset, params=None):
    """creates model from the dataset and load/initializes its parameters if necessary."""

    vocab_sizes = [
        len(dataset.vocab[k]["w2i"]) for k in ["word", "ent", "num"]
    ]
    embed_sizes = [
        args.word_embed_size,
        args.entdist_embed_size,
        args.numdist_embed_size,
    ]
    pads = [dataset.vocab[k]["w2i"]["PAD"] for k in ["word", "ent", "num"]]

    if args.model == "LSTM":
        model = BLSTMModel(
            vocab_sizes,
            embed_sizes,
            sum(embed_sizes),
            args.blstm_fc_hidden_dim,
            len(dataset.vocab["label"]["w2i"]),
            pads,
            args.dropout,
        )
    elif args.model == "CNN":
        model = ConvModel(
            vocab_sizes,
            embed_sizes,
            sum(embed_sizes),
            args.conv_fc_hidden_dim,
            len(dataset.vocab["label"]["w2i"]),
            pads,
            args.num_filters,
            args.dropout,
        )

    if params is not None:
        model.load_state_dict(params)

    else:
        # initialize all the model weights
        for p in model.parameters():
            torch.nn.init.uniform_(p, -args.uniform_init, args.uniform_init)

        # Make sure that pad vectors are zero
        model.embed.pad_init()

    if args.cuda:
        model = model.to("cuda")

    optimizer = torch.optim.SGD(model.parameters(), lr=args.initial_lr)

    return model, optimizer
Beispiel #6
0
    train_dataset = FastPoseDataset(args.train_data, args.max_frames,
                                    transform)
    valid_dataset = FastPoseDataset(args.valid_data, args.max_frames,
                                    transform)

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  collate_fn=collate_function)
    valid_dataloader = DataLoader(valid_dataset,
                                  batch_size=args.batch_size,
                                  collate_fn=collate_function)

    if args.model == "Conv":
        model = ConvModel(args.conv_channels,
                          "ReLU",
                          pos_emb=args.conv_pos_emb)
    elif args.model == "TransformerEncoder":
        model = TransformerEncoder(args, 100)
    elif args.model == "ConvTransformerEncoder":
        model = ConvTransformerEncoder(args, 21 * 2)
    elif args.model == "TransformerEnc":
        model = TransformerEnc(ninp=12 * 2,
                               nhead=4,
                               nhid=100,
                               nout=21 * 2,
                               nlayers=4,
                               dropout=0.0)
    else:
        raise ValueError()
    print(args.resume)
Beispiel #7
0
 def create_model(self):
     return ConvModel(self.train_set.in_channels(),
                      self.train_set.out_channels(),
                      dropout=self.helper.opt.cnn.dropout)
Beispiel #8
0
import sys
import pandas as pd
import torch
from torch.utils.data import DataLoader

from datasets import KPIDataset
from models import ConvModel
from util import print_progress_bar

dataset = KPIDataset('../data/test_preprocessed.csv',
                     seq_length=1001,
                     step_width=1,
                     evaluate=True)

model = ConvModel(1001)
model.load_state_dict(torch.load('./state'))
model = model.cuda()

loader = DataLoader(dataset, 256, False)

iter_per_epoch = len(loader)
result = []

with torch.no_grad():
    for i, x in enumerate(loader):
        x = x.cuda()
        out = model(x).data.cpu().numpy()
        result.extend(list(out.argmax(1)))
        print_progress_bar(i, iter_per_epoch)

df = pd.read_csv('../data/test_preprocessed.csv')
Beispiel #9
0
    def __init__(self, gpu, mode, args):
        self.rank = gpu
        self.mode = mode
        self.args = args

        self.lr = getattr(self.args, 'lr', 1e-5)
        self.l2 = getattr(self.args, 'l2', 0.0001)
        self.model_type = getattr(self.args, 'model_type', 'ConvRNN')

        if self.args.on_cpu:
            self.device = torch.device('cpu')
        else:
            self.device = torch.device('cuda:{}'.format(self.rank))
        dist.init_process_group('nccl',
                                world_size=len(self.args.gpus),
                                rank=self.rank)

        torch.manual_seed(self.args.seed)

        if self.rank == 0:
            print('Creating model...')

        if self.model_type == 'CNN':
            self.model = ConvModel(self.args, self.device).to(self.device)
        else:
            self.model = ConvRNNModel(self.args, self.device).to(self.device)

        if not self.args.on_cpu:
            self.model = torch.nn.parallel.DistributedDataParallel(self.model, device_ids=[self.rank],
                                                                   find_unused_parameters=False)

        ckpt_file = None
        if self.mode != 'train':
            # ckpt_file = self.args.out_dir
            if self.args.train:
                ckpt_epoch_offset = 1
                ckpt_file = os.path.join(self.args.model_save_dir,
                                         self.args.ckpt_file_tmplt.format(self.args.epochs - ckpt_epoch_offset))
                while not os.path.exists(ckpt_file) and self.args.epochs - ckpt_epoch_offset >= 0:
                    ckpt_epoch_offset += 1
                    ckpt_file = os.path.join(self.args.model_save_dir,
                                             self.args.ckpt_file_tmplt.format(self.args.epochs - ckpt_epoch_offset))

            else:
                ckpt_file = self.args.ckpt_file

        if ckpt_file is not None:
            if self.rank == 0:
                print('Loading model from checkpoint...')
            map_location = {'cuda:{}'.format(0): 'cuda:{}'.format(gpu_id) for gpu_id in args.gpus}
            state_dict = torch.load(ckpt_file, map_location=map_location)
            self.model.load_state_dict(state_dict)  # , strict=False

        if self.rank == 0:
            print('Loading dataset...')

        dataset_start_time = time.time()

        if self.model_type == 'CNN':
            frame_undersample_p = getattr(self.args, 'frame_undersample_p', 1.0)
            frame_undersample_p = None if frame_undersample_p == 1.0 else frame_undersample_p
            self.dataset = PhysicsFramesDataset(self.mode, self.args.data, self.args, undersample=frame_undersample_p)
        else:
            self.dataset = PhysicsDataset(self.mode, self.args.data, self.args)

        if self.rank == 0:
            print('Time to read dataset: {0:.2f}s'.format(time.time() - dataset_start_time))

        if getattr(self.args, 'weight_xent_loss', False):
            self.label_weights = self.dataset.label_weights
        else:
            self.label_weights = [1.0, 1.0]
        if self.rank == 0:
            print('Setting label weights to {}...'.format(self.label_weights))
        self.model.module.set_label_weights(self.label_weights)

        if self.args.on_cpu:
            data_sampler = None
        else:
            data_sampler = torch.utils.data.distributed.DistributedSampler(self.dataset,
                                                                           num_replicas=args.world_size,
                                                                           rank=self.rank,
                                                                           shuffle=True if self.mode == 'train'
                                                                           else False)

        self.data_loader = DataLoader(self.dataset, batch_size=self.args.batch_size, shuffle=False,
                                      num_workers=self.args.n_data_workers, pin_memory=True, sampler=data_sampler)
        self.n_iters = int(math.ceil(len(self.dataset) / (self.args.batch_size * len(self.args.gpus))))

        self.aux_dataset = None
        self.aux_data_loader = None
        self.aux_n_iters = None

        if self.mode == 'train' and (self.args.eval or self.args.eval_every > 0):
            if self.rank == 0:
                print('Loading aux dataset...')

            if self.model_type == 'CNN':
                frame_undersample_p = getattr(self.args, 'frame_undersample_p', 1.0)
                frame_undersample_p = None if frame_undersample_p == 1.0 else frame_undersample_p
                self.aux_dataset = PhysicsFramesDataset('dev', self.args.data, self.args,
                                                        force_unbalanced=True,
                                                        undersample=frame_undersample_p)
            else:
                self.aux_dataset = PhysicsDataset('dev', self.args.data, self.args, force_unbalanced=True,
                                                  disallow_supersample=True)

            if self.args.on_cpu:
                aux_data_sampler = None
            else:
                aux_data_sampler = torch.utils.data.distributed.DistributedSampler(self.aux_dataset,
                                                                                   num_replicas=args.world_size,
                                                                                   rank=self.rank)
            self.aux_data_loader = DataLoader(self.aux_dataset, batch_size=self.args.batch_size, shuffle=False,
                                              num_workers=self.args.n_data_workers, pin_memory=True,
                                              sampler=aux_data_sampler)
            self.aux_n_iters = int(math.ceil(len(self.aux_dataset) / (self.args.batch_size * len(self.args.gpus))))

        self.summary_writer = None
        if self.rank == 0 and self.mode == 'train':
            self.summary_writer = SummaryWriter(log_dir=self.args.tb_dir)

        self.n_epochs = 1
        if self.mode == 'train':
            self.n_epochs = self.args.epochs

            opt_parms = filter(lambda p: p.requires_grad, self.model.parameters())
            self.optimizer = optim.Adam(opt_parms, lr=self.lr, weight_decay=self.l2)
            n_total_iters = self.n_iters * self.n_epochs
            n_warmup_steps = self.args.warmup_proportion * n_total_iters
            if n_warmup_steps > 0:  # get_linear_schedule_with_warmup
                self.scheduler = get_cosine_schedule_with_warmup(self.optimizer,
                                                                 num_warmup_steps=n_warmup_steps,
                                                                 num_training_steps=n_total_iters)
                # self.scheduler = get_linear_schedule_with_warmup(self.optimizer,
                #                                                  num_warmup_steps=n_warmup_steps,
                #                                                  num_training_steps=n_total_iters)
            else:
                self.scheduler = None

        self.run()
Beispiel #10
0
def main(name, test=False, chkpt=None, device="cuda"):
    if not test:
        wandb.init(project="dqn-tutorial", name=name)
    do_boltzman_exploration = False
    memory_size = 1000000
    min_rb_size = 50000
    sample_size = 32
    lr = 0.0001

    # eps_max = 1.0
    eps_min = 0.1

    eps_decay = 0.999999

    env_steps_before_train = 16
    tgt_model_update = 5000
    epochs_before_test = 1500

    env = gym.make("Breakout-v0")
    env = FrameStackingAndResizingEnv(env, 84, 84, 4)

    test_env = gym.make("Breakout-v0")
    test_env = FrameStackingAndResizingEnv(test_env, 84, 84, 4)

    last_observation = env.reset()

    m = ConvModel(env.observation_space.shape, env.action_space.n, lr=lr).to(device)
    if chkpt is not None:
        m.load_state_dict(torch.load(chkpt))
    tgt = ConvModel(env.observation_space.shape, env.action_space.n).to(device)
    update_tgt_model(m, tgt)

    rb = ReplayBuffer()
    steps_since_train = 0
    epochs_since_tgt = 0
    epochs_since_test = 0

    step_num = -1 * min_rb_size

    episode_rewards = []
    rolling_reward = 0

    tq = tqdm()
    try:
        while True:
            if test:
                env.render()
                time.sleep(0.05)
            tq.update(1)

            eps = eps_decay ** (step_num)
            if test:
                eps = 0

            if do_boltzman_exploration:
                logits = m(torch.Tensor(last_observation).unsqueeze(0).to(device))[0]
                action = torch.distributions.Categorical(logits=logits).sample().item()
            else:
                if random() < eps:
                    action = (
                        env.action_space.sample()
                    )  # your agent here (this takes random actions)
                else:
                    action = m(torch.Tensor(last_observation).unsqueeze(0).to(device)).max(-1)[-1].item()

            observation, reward, done, info = env.step(action)
            rolling_reward += reward

            rb.insert(Sarsd(last_observation, action, reward, observation, done))

            last_observation = observation

            if done:
                episode_rewards.append(rolling_reward)
                if test:
                    print(rolling_reward)
                rolling_reward = 0
                observation = env.reset()

            steps_since_train += 1
            step_num += 1

            if (
                (not test)
                and rb.idx > min_rb_size
                and steps_since_train > env_steps_before_train
            ):
                loss = train_step(
                    m, rb.sample(sample_size), tgt, env.action_space.n, device
                )
                wandb.log(
                    {
                        "loss": loss.detach().cpu().item(),
                        "eps": eps,
                        "avg_reward": np.mean(episode_rewards),
                    },
                    step=step_num,
                )
                episode_rewards = []
                epochs_since_tgt += 1
                epochs_since_test += 1

                if epochs_since_test > epochs_before_test:
                    rew, frames = run_test_episode(m, test_env, device)
                    # T, H, W, C
                    wandb.log({'test_reward': rew, 'test_video': wandb.Video(frames.transpose(0, 3, 1, 2), str(rew), fps=25, format='mp4')})
                    epochs_since_test = 0

                if epochs_since_tgt > tgt_model_update:
                    print("updating target model")
                    update_tgt_model(m, tgt)
                    epochs_since_tgt = 0
                    torch.save(tgt.state_dict(), f"models/{step_num}.pth")

                steps_since_train = 0

    except KeyboardInterrupt:
        pass

    env.close()
def main(name=None, chkpt=None, test_run=False, local_run=False):
    "Sync to wandb cloud as standard, but sync locally if local_run and not at all if test_run"
    if not test_run:
        if local_run:
            os.environ["WANDB_MODE"] = "dryrun"
        if name == None:
            name = input("Name the run: ")
        wandb.init(project="atari-breakout", name=name, config={
            'memory_size': memory_size,
            'min_rb_size': min_rb_size,
            'sample_size': sample_size,
            'lr': lr,
            'eps_min': eps_min,
            'eps_decay': eps_decay,
            'discount_factor': discount_factor,
            'env_steps_before_train': env_steps_before_train,
            'epochs_before_tgt_model_update': epochs_before_tgt_model_update,
            'epochs_before_test': epochs_before_test,
            'episode_max_steps': episode_max_steps,
            'optimizer_function': optimizer_function.__name__,
            'exploration_method': exploration_method.__name__,
            'env_type': env_type.__name__
        })

    "Create enviroments and reset"
    env = env_type(gym.make("BreakoutDeterministic-v4"), 84, 84, 4)
    test_env = env_type(gym.make("BreakoutDeterministic-v4"), 84, 84, 4)
    last_observation = env.reset()

    "Set the model and targetmodel"
    m = ConvModel(env.observation_space.shape, env.action_space.n, lr=lr)
    if chkpt is not None:
        m.load_state_dict(torch.load(os.path.join(os.path.dirname(__file__), f"Models/{chkpt}")))
    target = ConvModel(env.observation_space.shape, env.action_space.n)
    update_target_model(m, target)

    "Create replaybuffer and other variables"
    rb = ReplayBuffer(memory_size)
    steps_since_train = 0
    epochs_since_tgt = 0
    epochs_since_test = 0
    step_num = -1 * min_rb_size  # Want to run the iteration for min_rb_size before starting to actually learn
    episode_rewards = []
    total_reward = 0

    tq = tqdm()
    try:
        while True:
            if test_run:
                env.render()
                time.sleep(0.05)
            tq.update(1)

            "Updating epsilon"
            eps = eps_decay ** (step_num)
            if test_run:
                eps = 0
            elif eps < eps_min:
                eps = eps_min

            "Exploration vs exploitation, Boltzmann with eps_decay vs. Epsilon Greedy (defined in constants.py)"
            action = exploration_method(model=m, env=env, last_observation=last_observation, eps=eps)

            "Perform step and insert observation to replaybuffer"
            observation, reward, done, _ = env.step(action)
            total_reward += reward
            rb.insert(GameInformation(last_observation, action, reward, observation, done))
            last_observation = observation

            "Reset and append total_reward to episode_rewards if done"
            if done:
                episode_rewards.append(total_reward)
                if test_run:
                    print(total_reward)
                total_reward = 0
                observation = env.reset()

            "Train if ran enough steps since last training"
            steps_since_train += 1
            step_num += 1
            if ((not test_run) and rb.i > min_rb_size and steps_since_train > env_steps_before_train):
                loss = train_step(m, rb.sample(sample_size), target, env.action_space.n)
                if not local_run:
                    wandb.log(
                        {
                            "loss": loss.detach().item(),
                            "eps": eps,
                            "avg_reward": np.mean(episode_rewards),
                        },
                        step=step_num,
                    )
                episode_rewards = []
                epochs_since_tgt += 1
                epochs_since_test += 1

                "Run test_run episode"
                if epochs_since_test > epochs_before_test:
                    rew, frames = run_test_episode(m, test_env)
                    if not local_run:
                        wandb.log({'test_reward': rew, 'test_video': wandb.Video(frames.transpose(0, 3, 1, 2), str(rew), fps=25, format='mp4')})
                    epochs_since_test = 0

                "Update target model"
                if epochs_since_tgt > epochs_before_tgt_model_update:
                    print("updating target model")
                    update_target_model(m, target)
                    epochs_since_tgt = 0
                    torch.save(target.state_dict(), os.path.join(os.path.dirname(__file__), f"Models/{step_num}.pth"))

                steps_since_train = 0

    except KeyboardInterrupt:
        pass

    env.close()
Beispiel #12
0
import sys
from torch.nn import CrossEntropyLoss

from util import Trainer
from datasets import KPIDataset
from models import ConvModel


dataset = KPIDataset(
    '../data/train_preprocessed.csv',
    seq_length=1001,
    step_width=1
)

model = ConvModel(1001)

args = {
    "lr": 0.5e-4,
    "betas": (0.9, 0.999),
    "eps": 1e-8,
    "weight_decay": 0.0
}

trainer = Trainer(
    model,
    dataset,
    batch_size=512,
    epochs=100,
    log_nth=800,
    validation_size=0.2,
    optim_args=args,
Beispiel #13
0
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)

# Map for labels, so that we can visualize the results better
label_map = {
    0: 'Apu', 1: 'Bart', 2: 'Mr. Burns', 3: 'Chief Wiggum', 4: 'Edna', 5: 'Grandpa',
    6: 'Homer', 7: 'Krusty', 8: 'Lisa', 9: 'Marge', 10: 'Milhouse', 11: 'Moe', 
    12: 'Flanders', 13: 'Nelson', 14: 'Patty', 15: 'Skinner', 16: 'Selma', 17: 'Smithers'
}

# Load test dataset 
test_dataset = torchvision.datasets.ImageFolder('./data/test', transform=transform)
validation_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

from models import ConvModel

model = ConvModel(channels=3, num_classes=18).to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate)

print(model)

trainer = create_supervised_trainer(model, optimizer, criterion, device=device)

metrics = {
    "accuracy": Accuracy(),
    "loss": Loss(criterion)
}
evaluator = create_supervised_evaluator(model,metrics, device=device)

@trainer.on(Events.ITERATION_COMPLETED(every=10))
def log_training_loss(engine):
Beispiel #14
0
def dqnmain(project_name,
            do_boltzman_exploration=False,
            test=False,
            chkpt=None,
            hypeparams=hyperparameter_defaults,
            steps=1000,
            device='cuda'):
    image_arr = []

    if (not test):
        wdbrun = wandb.init(project=project_name,
                            config=hypeparams,
                            name=hypeparams['run_name'],
                            reinit=True,
                            monitor_gym=False)
        # run.save("*.pth")
        config = wdbrun.config
        max_reward = config.max_reward
        max_steps = config.max_steps
        memory_size = config.memory_size
        min_rb_size = config.min_rb_size
        sample_size = config.sample_size
        env_steps_before_train = config.env_steps_before_train
        tgt_model_update = config.tgt_model_update
        reward_scaler = config.reward_scaler
        eps_min = config.eps_min
        eps_decay = config.eps_decay
        gamma = config.gamma
        learning_rate = config.learning_rate
    else:
        max_reward = hypeparams['max_reward']
        max_steps = steps
        memory_size = hypeparams['memory_size']
        min_rb_size = hypeparams['min_rb_size']
        sample_size = hypeparams['sample_size']
        env_steps_before_train = hypeparams['env_steps_before_train']
        tgt_model_update = hypeparams['tgt_model_update']
        reward_scaler = hypeparams['reward_scaler']
        eps_min = hypeparams['eps_min']
        eps_decay = hypeparams['eps_decay']
        gamma = hypeparams['gamma']
        learning_rate = hypeparams['learning_rate']

    env = gym.make(hypeparams['env_name'])
    if hypeparams['env_name'] == 'Breakout-v0':
        #TODO
        env = FrameStackingAndResizingEnv(env, 84, 84,
                                          4)  # change stack size here
    env._max_episode_steps = 4000

    test_env = gym.make(hypeparams['env_name'])
    if hypeparams['env_name'] == 'Breakout-v0':
        #TODO
        test_env = FrameStackingAndResizingEnv(test_env, 84, 84,
                                               4)  # change stack size here
    test_env._max_episode_steps = 4000
    last_observation = env.reset()

    if hypeparams['env_name'] == 'Breakout-v0':
        m = ConvModel(env.observation_space.shape, env.action_space.n,
                      learning_rate).to(device)
    else:
        m = Model(env.observation_space.shape, env.action_space.n,
                  learning_rate).to(device)
    if chkpt is not None:
        m.load_state_dict(torch.load(chkpt))

    if hypeparams['env_name'] == 'Breakout-v0':
        tgt = ConvModel(env.observation_space.shape,
                        env.action_space.n).to(device)
    else:
        tgt = Model(env.observation_space.shape, env.action_space.n).to(
            device)  # target model, gets update fewer times
    update_tgt_model(m, tgt)

    rb = ReplayBuffer(memory_size)
    steps_since_train = 0
    epochs_since_tgt = 0

    step_num = -1 * min_rb_size
    i = 0

    episode_rewards = []
    rolling_reward = 0
    solved = False

    try:
        while (not solved) and step_num < max_steps:
            if test:
                screen = env.render('rgb_array')
                image_arr.append(screen)
                eps = 0
            else:
                eps = eps_decay**(step_num)

            if do_boltzman_exploration:
                if hypeparams['env_name'] == 'Breakout-v0':
                    logits = m(
                        torch.Tensor(last_observation).unsqueeze(0).to(
                            device))[0]
                    action = torch.distributions.Categorical(
                        logits=logits).sample().item()
                else:
                    logits = m(torch.Tensor(last_observation).to(device))[0]
                    action = torch.distributions.Categorical(
                        logits=logits).sample().item()
            else:
                if random.random() < eps:
                    action = env.action_space.sample()
                else:
                    if hypeparams['env_name'] == 'Breakout-v0':
                        action = m(
                            torch.Tensor(last_observation).unsqueeze(0).to(
                                device)).max(-1)[-1].item()
                    else:
                        action = m(torch.Tensor(last_observation).to(
                            device)).max(-1)[-1].item()

            observation, reward, done, info = env.step(action)
            rolling_reward += reward

            reward = reward / reward_scaler

            rb.insert(SARS(last_observation, action, reward, done,
                           observation))

            last_observation = observation

            if done:
                episode_rewards.append(rolling_reward)
                if test:
                    print(rolling_reward)
                rolling_reward = 0
                observation = env.reset()

            steps_since_train += 1
            i += 1
            step_num += 1
            if (
                    not test
            ) and rb.idx > min_rb_size and steps_since_train > env_steps_before_train:
                loss = train_step(m, rb.sample(sample_size), tgt,
                                  env.action_space.n, gamma, device)
                ave_reward = np.mean(episode_rewards)
                wdbrun.log(
                    {
                        'loss': loss.detach().cpu().item(),
                        'epsilon': eps,
                        'avg_reward': ave_reward
                    },
                    step=step_num)
                if ave_reward >= max_reward:
                    solved = True
                episode_rewards = []
                epochs_since_tgt += 1
                # print(step_num, loss.detach().item())
                if epochs_since_tgt > tgt_model_update:
                    # print('updating target model')
                    update_tgt_model(m, tgt)
                    rew, frames = run_test_episode(m, test_env, device)
                    # frames.shape == (T, H, W, C)
                    # wandb.log({'test_reward': rew, 'test_video': wandb.Video(frames.transpose(0, 3, 1, 2), str(rew), fps=25, format='mp4')})
                    wandb.log({'test_reward': rew})
                    epochs_since_tgt = 0
                    torch.save(
                        tgt.state_dict(),
                        f"{wandb.run.dir}/{hypeparams['run_name']}_{step_num}.pth"
                    )
                steps_since_train = 0
                if ave_reward >= max_reward:
                    solved = True
        wandb.join()
        env.close()
    except KeyboardInterrupt:
        sys.exit()