Ejemplo n.º 1
0
    def __init__(self, params):
        super(BCAgent, self).__init__(params)

        # Initialize policy network
        pol_params = self.params['p-bc']['pol_params']
        pol_params['input_size'] = self.N
        pol_params['output_size'] = self.M
        if 'final_activation' not in pol_params:
            pol_params['final_activation'] = torch.tanh

        self.pol = MLP(pol_params)

        # Create policy optimizer
        ppar = self.params['p-bc']['pol_optim']
        self.pol_optim = torch.optim.Adam(self.pol.parameters(),
                                          lr=ppar['lr'],
                                          weight_decay=ppar['reg'])

        # Use a replay buffer that will save planner actions
        self.pol_buf = ReplayBuffer(self.N, self.M,
                                    self.params['p-bc']['buf_size'])

        # Logging (store cum_rew, cum_emp_rew)
        self.hist['pols'] = np.zeros((self.T, 2))

        self.has_pol = True

        self.pol_cache = ()
Ejemplo n.º 2
0
def main(arguments):
    # load the features of the dataset
    features = datasets.load_breast_cancer().data

    # standardize the features
    features = StandardScaler().fit_transform(features)

    # get the number of features
    num_features = features.shape[1]

    # load the labels for the features
    labels = datasets.load_breast_cancer().target

    train_features, test_features, train_labels, test_labels = train_test_split(
        features, labels, test_size=0.30, stratify=labels)

    model = MLP(alpha=LEARNING_RATE,
                batch_size=BATCH_SIZE,
                node_size=NUM_NODES,
                num_classes=NUM_CLASSES,
                num_features=num_features)

    model.train(num_epochs=arguments.num_epochs,
                log_path=arguments.log_path,
                train_data=[train_features, train_labels],
                train_size=train_features.shape[0],
                test_data=[test_features, test_labels],
                test_size=test_features.shape[0],
                result_path=arguments.result_path)
Ejemplo n.º 3
0
def main(args):

    features, targets = generate_synthetic_data(args.model_type,
                                                args.num_samples)

    # split train/test sets
    x_train, x_val, y_train, y_val = train_test_split(features,
                                                      targets,
                                                      test_size=0.2)

    db_train = tf.data.Dataset.from_tensor_slices(
        (x_train, y_train)).batch(args.batch_size_train)
    db_val = tf.data.Dataset.from_tensor_slices(
        (x_val, y_val)).batch(args.batch_size_eval)

    if args.model_type == 'MLP':
        model = MLP(num_inputs=Constants._MLP_NUM_FEATURES,
                    num_layers=Constants._MLP_NUM_LAYERS,
                    num_dims=Constants._MLP_NUM_DIMS,
                    num_outputs=Constants._NUM_TARGETS,
                    dropout_rate=args.dropout)
    elif args.model_type == 'TCN':
        model = TCN(nb_filters=Constants._TCN_NUM_FILTERS,
                    kernel_size=Constants._TCN_KERNEL_SIZE,
                    nb_stacks=Constants._TCN_NUM_STACK,
                    dilations=Constants._TCN_DIALATIONS,
                    padding=Constants._TCN_PADDING,
                    dropout_rate=args.lr)

    criteon = keras.losses.MeanSquaredError()
    optimizer = keras.optimizers.Adam(learning_rate=args.lr)

    for epoch in range(args.max_epoch):
        for step, (x, y) in enumerate(db_train):
            with tf.GradientTape() as tape:
                logits = model(x)
                loss = criteon(y, logits)
            grads = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))

            if step % 100 == 0:
                print('Epoch: {}, Step: {}/{}, Loss: {}'.format(
                    epoch, step, int(x_train.shape[0] / args.batch_size_train),
                    loss))

        # Perform inference and measure the speed every epoch
        start_time = time.time()
        for _, (x, _) in enumerate(db_val):
            _ = model.predict(x)
        end_time = time.time()

        print("Inference speed: {} samples/s\n".format(
            x_val.shape[0] / (end_time - start_time)))
 def get_model(format, optimised=True) -> AbstractModel:
     if format == 'LogisticRegression':
         return LogisticRegressionModel(optimised)
     if format == 'RandomForest':
         return RandomForestModel(optimised)
     if format == 'NaiveBayes':
         return NaiveBayes(optimised)
     if format == 'GradientBoosting':
         return GradientBoosting(optimised)
     if format == 'SVM':
         return SVM(optimised)
     if format == 'OneClassSVM':
         return OneClassSVMModel(optimised)
     if format == 'DecisionTree':
         return DecisionTree(optimised)
     if format == 'AdaBoost':
         return AdaBoost(optimised)
     if format == 'GaussianProcess':
         return GaussianProcess(optimised)
     if format == 'MLP':
         return MLP(optimised)
     if format == 'KNeighbors':
         return KNeighbors(optimised)
     if format == 'QuadraticDiscriminant':
         return QuadraticDiscriminant(optimised)
     if format == 'Dummy':
         return Dummy(optimised)
     else:
         raise ValueError(format)
Ejemplo n.º 5
0
 def define_model(self, model_name, ps):
     if model_name == 'catboost':
         return GBDTCatBoost(self.task, **ps)
     elif model_name == 'lightgbm':
         return GBDTLGBM(self.task, **ps)
     elif model_name == 'mlp':
         return MLP(self.task, **ps)
     elif model_name == 'gnn':
         return GNN(self.task, **ps)
     elif model_name == 'resgnn':
         gbdt = GBDTCatBoost(self.task)
         gbdt.fit(self.X,
                  self.y,
                  self.train_mask,
                  self.val_mask,
                  self.test_mask,
                  cat_features=self.cat_features,
                  num_epochs=1000,
                  patience=100,
                  plot=False,
                  verbose=False,
                  loss_fn=None,
                  metric_name='loss'
                  if self.task == 'regression' else 'accuracy')
         return GNN(task=self.task,
                    gbdt_predictions=gbdt.model.predict(self.X),
                    **ps)
     elif model_name == 'bgnn':
         return BGNN(self.task, **ps)
Ejemplo n.º 6
0
def main(args):
    exp_info = exp_config.Experiment(args.dataset)
    paths = exp_info.paths
    args.paths = paths
    args.metadata = exp_info.metadata

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    batch_size = args.batch_size
    args.batch_size = 1
    feature_size, train_loader, val_loader, test_loader, all_loader = exp_info.get_dataset(
        args, save=True)
    label_num = exp_info.get_label_num(args)

    hidden_size = 256
    hidden_layers = 2

    args.resume = os.path.join(
        paths.checkpoint_root,
        'detection_{}_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}'.format(
            args.task, args.model, args.epochs, args.lr, args.batch_size,
            args.lr_decay, 1 if not args.subsample else args.subsample,
            args.dropout_rate))
    if args.model == 'lstm':
        detection_model = BiLSTM(feature_size, hidden_size, hidden_layers,
                                 label_num)
    else:
        detection_model = MLP(feature_size, hidden_size, label_num)
    detection_model = torch.nn.DataParallel(detection_model)
    logutils.load_checkpoint(args, detection_model)

    args.resume = os.path.join(
        paths.checkpoint_root,
        'frame_prediction_{}_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}_pd{}'.format(
            args.task, args.model, args.epochs, args.lr, args.batch_size,
            args.lr_decay, 1 if not args.subsample else args.subsample,
            args.dropout_rate, args.using_pred_duration))
    if args.model == 'lstm':
        prediction_model = LSTM_Pred(feature_size, hidden_size, hidden_layers,
                                     label_num)
    else:
        prediction_model = MLP(feature_size, hidden_size, label_num)
    prediction_model = torch.nn.DataParallel(prediction_model)
    logutils.load_checkpoint(args, prediction_model)

    validate(test_loader, detection_model, prediction_model, args=args)
Ejemplo n.º 7
0
def get_model(model):
    """
    Get Model instance
    """
    assert model in ['CNN', 'MLP']

    if model == 'CNN': return Char_CNN(config, fc_layers, filter_sizes)
    else: return MLP(config, fc_layers)
Ejemplo n.º 8
0
def main(args):
    exp_info = exp_config.Experiment(args.dataset)
    paths = exp_info.paths
    args.paths = paths
    args.resume = os.path.join(
        paths.checkpoint_root,
        'frame_prediction_{}_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}_pd{}'.format(
            args.task, args.model, args.epochs, args.lr, args.batch_size,
            args.lr_decay, 1 if not args.subsample else args.subsample,
            args.dropout_rate, args.pred_duration))
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    feature_size, train_loader, val_loader, test_loader, _ = exp_info.get_dataset(
        args)
    label_num = exp_info.get_label_num(args)

    criterion = torch.nn.CrossEntropyLoss()
    hidden_size = 256
    hidden_layers = 2
    if args.model == 'lstm':
        model = LSTM_Pred(feature_size, hidden_size, hidden_layers, label_num)
    else:
        model = MLP(feature_size, hidden_size, label_num)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    scheduler = lr_scheduler.StepLR(optimizer, args.lr_freq, args.lr_decay)
    model = torch.nn.DataParallel(model)
    if args.cuda:
        criterion = criterion.cuda()
        model = model.cuda()
    if args.resume:
        utils.load_checkpoint(args, model, optimizer, scheduler)

    best_prec = 0.0

    if args.eval:
        validate(test_loader, model, args, test=True)
    else:
        for epoch in tqdm(range(args.start_epoch, args.epochs),
                          desc='Epochs Loop'):
            train(train_loader, model, criterion, optimizer, epoch, args)
            prec = validate(val_loader, model, args)
            scheduler.step()
            best_prec = max(prec, best_prec)
            is_best = (best_prec == prec)
            tqdm.write('Best precision: {:.03f}'.format(best_prec))
            if (epoch + 1) % args.save_interval == 0:
                utils.save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'state_dict': model.state_dict(),
                        'best_prec': best_prec,
                        'optimizer': optimizer.state_dict(),
                        'scheduler': scheduler.state_dict()
                    }, is_best, args)
Ejemplo n.º 9
0
    def __init__(self, params):
        super(VPGAgent, self).__init__(params)
        self.H = self.params['pg']['H']
        self.lam = self.params['pg']['lam']

        # Initialize policy network
        pol_params = self.params['pg']['pol_params']
        pol_params['input_size'] = self.N
        pol_params['output_size'] = self.M
        if 'final_activation' not in pol_params:
            pol_params['final_activation'] = torch.tanh

        self.pol = MLP(pol_params)

        # Std's are not dependent on state
        init_log_std = -0.8 * torch.ones(self.M)  # ~0.45
        self.log_std = torch.nn.Parameter(init_log_std, requires_grad=True)

        # Create policy optimizer
        ppar = self.params['pg']['pol_optim']
        self.pol_params = list(self.pol.parameters()) + [self.log_std]
        self.pol_optim = torch.optim.Adam(self.pol_params,
                                          lr=ppar['lr'],
                                          weight_decay=ppar['reg'])

        # Create value function and optimizer
        val_params = self.params['pg']['val_params']
        val_params['input_size'] = self.N
        val_params['output_size'] = 1

        self.val = MLP(val_params)

        vpar = self.params['pg']['val_optim']
        self.val_optim = torch.optim.Adam(self.val.parameters(),
                                          lr=vpar['lr'],
                                          weight_decay=vpar['reg'])

        # Logging
        self.hist['ent'] = np.zeros(self.T)
Ejemplo n.º 10
0
def main(arguments):
    # load the features of the dataset
    features = datasets.load_breast_cancer().data

    # standardize the features
    features = StandardScaler().fit_transform(features)

    # get the number of features
    num_features = features.shape[1]

    # load the labels for the features
    labels = datasets.load_breast_cancer().target

    train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.30,
                                                                                stratify=labels)

    model = MLP(alpha=LEARNING_RATE, batch_size=BATCH_SIZE, node_size=NUM_NODES, num_classes=NUM_CLASSES,
                num_features=num_features)

    model.train(num_epochs=arguments.num_epochs, log_path=arguments.log_path, train_data=[train_features, train_labels],
                train_size=train_features.shape[0], test_data=[test_features, test_labels],
                test_size=test_features.shape[0], result_path=arguments.result_path)
Ejemplo n.º 11
0
    def __init__(self, cnn_args, mlp_args):
        super(CNN, self).__init__()

        # embedding layer
        self.embedding_dim = cnn_args['emb_dim']
        self.embedding = nn.Embedding(cnn_args['vocab_size'],
                                      self.embedding_dim)
        # initialize with pretrained embeddings
        print("Initializing with pretrained embeddings")
        self.embedding.weight.data.copy_(cnn_args['pretrained_emb'])

        # Dropout definition
        self.dropout = nn.Dropout(0.25)

        # CNN parameters definition
        # Kernel sizes
        self.kernel_1 = 2
        self.kernel_2 = 3
        self.kernel_3 = 4
        self.kernel_4 = 5

        # Num kernels for each convolution size
        self.seq_len = cnn_args['text_len']
        # Output size for each convolution
        self.out_channels = cnn_args['num_kernel']
        # Number of strides for each convolution
        self.stride = cnn_args['stride']

        # Convolution layers definition
        self.conv_1 = nn.Conv1d(self.seq_len, self.out_channels, self.kernel_1,
                                self.stride)
        self.conv_2 = nn.Conv1d(self.seq_len, self.out_channels, self.kernel_2,
                                self.stride)
        self.conv_3 = nn.Conv1d(self.seq_len, self.out_channels, self.kernel_3,
                                self.stride)
        self.conv_4 = nn.Conv1d(self.seq_len, self.out_channels, self.kernel_4,
                                self.stride)

        # Max pooling layers definition
        self.pool_1 = nn.MaxPool1d(self.kernel_1, self.stride)
        self.pool_2 = nn.MaxPool1d(self.kernel_2, self.stride)
        self.pool_3 = nn.MaxPool1d(self.kernel_3, self.stride)
        self.pool_4 = nn.MaxPool1d(self.kernel_4, self.stride)

        # MLP classfier
        mlp_input_size = self.in_features_fc()
        #print("mlp_input_size:", mlp_input_size)
        self.mlp = MLP(input_size=mlp_input_size,
                       hidden_size=mlp_args['hidden_size'],
                       num_classes=mlp_args['num_classes'])
Ejemplo n.º 12
0
def get_model(args):
    if args.model == "mlp":
        return MLP(args.input_size * 2, args.hidden_size, args.dropout,
                   args.output_size)
    elif args.model == "attention":
        return Attention(args.input_size * 2,
                         args.hidden_size[0],
                         args.layers,
                         args.dropout,
                         args.output_size,
                         gpu=args.gpu)
    elif args.model == 'linear':
        return Linear(args.input_size * 2, args.output_size)
    else:
        assert False
Ejemplo n.º 13
0
 def get_model(config, args, seq_indexer, label_indexer):
     if config['type'] == 'RNN':
         return TextRNNAttn(embedding_alphabet=seq_indexer,
                            gpu=args.gpu,
                            feat_num=label_indexer.__len__(),
                            **config['model'])
     elif config['type'] == 'CNN':
         return TextCNN(embedding_alphabet=seq_indexer,
                        gpu=args.gpu,
                        feat_num=label_indexer.__len__(),
                        **config['model'])
     elif config['type'] == 'MLP':
         return MLP(embedding_alphabet=seq_indexer,
                    gpu=args.gpu,
                    feat_num=label_indexer.__len__(),
                    **config['model'])
     else:
         raise RuntimeError('no model')
 def __init__(self, kernelSize=11, featureSize=1024):
     super().__init__()
     assert kernelSize % 2 == 1, "kernel should be odd"
     self.conv1 = nn.Conv1d(featureSize,
                            64,
                            kernelSize,
                            padding=kernelSize // 2)
     self.maxpool1 = nn.MaxPool1d(2)
     self.conv2 = nn.Conv1d(64, 96, kernelSize, padding=kernelSize // 2)
     self.maxpool2 = nn.MaxPool1d(2)
     self.upsample1 = nn.Upsample(scale_factor=2, mode="nearest")
     self.conv3 = nn.Conv1d(96, 64, kernelSize, padding=kernelSize // 2)
     self.upsample2 = nn.Upsample(scale_factor=2, mode="nearest")
     self.conv4 = nn.Conv1d(64,
                            featureSize,
                            kernelSize,
                            padding=kernelSize // 2)
     self.classifier = MLP(featureSize)
     self.featureSize = featureSize
Ejemplo n.º 15
0
    def __init__(self, lstm_args, mlp_args):
        super(LSTM, self).__init__()
        # setting hyperparams
        self.hidden_dim = lstm_args['hidden_size']
        self.dropout_prob = lstm_args['dropout']
        self.use_gru = lstm_args['gru']
        self.embedding_dim = lstm_args['emb_dim']

        # embedding layer
        self.embedding = nn.Embedding(lstm_args['vocab_size'],
                                      self.embedding_dim)
        # initialize with pretrained word emb if provided
        if 'pretrained_emb' in lstm_args:
            print("Initializing with pretrained embeddings")
            self.embedding.weight.data.copy_(lstm_args['pretrained_emb'])

        # biLSTM layer + dropout
        self.lstm = nn.LSTM(input_size=self.embedding_dim,
                            hidden_size=self.hidden_dim,
                            num_layers=2,
                            batch_first=True,
                            bidirectional=True)
        self.drop = nn.Dropout(p=self.dropout_prob)

        # GRU layer
        self.gru = nn.GRU(input_size=self.embedding_dim,
                          hidden_size=self.hidden_dim,
                          num_layers=2,
                          batch_first=True,
                          bidirectional=True,
                          dropout=self.dropout_prob)

        # fully-connected linear layer
        mlp_input_size = 2 * self.hidden_dim
        self.mlp = MLP(input_size=mlp_input_size,
                       hidden_size=mlp_args['hidden_size'],
                       num_classes=mlp_args['num_classes'])
Ejemplo n.º 16
0
    def __init__(self, params):
        self.params = params
        self.kappa = self.params['kappa']

        self.dtype = self.params['dtype']
        self.device = self.params['device']

        self.models = []
        self.priors = []
        self.optims = []

        for i in range(self.params['ens_size']):
            model = MLP(self.params['model_params']).to(device=self.device)
            self.models.append(model)
            self.optims.append(
                torch.optim.Adam(model.parameters(),
                                 lr=self.params['lr'],
                                 weight_decay=self.params['reg']))

            prior = MLP(self.params['model_params']).to(device=self.device)
            prior.eval()
            self.priors.append(prior)
Ejemplo n.º 17
0
class BCAgent(POLOAgent):
    """
    An agent extending upon POLO that uses behavior cloning on the planner
    predicted actions as a prior to MPC.
    """
    def __init__(self, params):
        super(BCAgent, self).__init__(params)

        # Initialize policy network
        pol_params = self.params['p-bc']['pol_params']
        pol_params['input_size'] = self.N
        pol_params['output_size'] = self.M
        if 'final_activation' not in pol_params:
            pol_params['final_activation'] = torch.tanh

        self.pol = MLP(pol_params)

        # Create policy optimizer
        ppar = self.params['p-bc']['pol_optim']
        self.pol_optim = torch.optim.Adam(self.pol.parameters(),
                                          lr=ppar['lr'],
                                          weight_decay=ppar['reg'])

        # Use a replay buffer that will save planner actions
        self.pol_buf = ReplayBuffer(self.N, self.M,
                                    self.params['p-bc']['buf_size'])

        # Logging (store cum_rew, cum_emp_rew)
        self.hist['pols'] = np.zeros((self.T, 2))

        self.has_pol = True

        self.pol_cache = ()

    def get_action(self):
        """
        BCAgent generates a planned trajectory using the behavior-cloned policy
        and then optimizes it via MPC.
        """
        self.pol.eval()

        # Run a rollout using the policy starting from the current state
        infos = self.get_traj_info()

        self.hist['pols'][self.time] = infos[3:5]
        self.pol_cache = (infos[0], infos[2])

        self.prior_actions = infos[1]

        # Generate trajectory via MPC with the prior actions as a prior
        action = super(BCAgent, self).get_action(prior=self.prior_actions)

        # Add final planning trajectory to BC buffer
        fin_states, fin_rews = self.cache[2], self.cache[3]
        fin_states = np.concatenate(([self.prev_obs], fin_states[1:]))
        pb_pct = self.params['p-bc']['pb_pct']
        pb_len = int(pb_pct * fin_states.shape[0])
        for t in range(pb_len):
            self.pol_buf.update(fin_states[t], fin_states[t + 1], fin_rews[t],
                                self.planned_actions[t], False)

        return action

    def do_updates(self):
        """
        Learn from the saved buffer of planned actions.
        """
        super(BCAgent, self).do_updates()

        if self.time % self.params['p-bc']['update_freq'] == 0:
            self.update_pol()

    def update_pol(self):
        """
        Update the policy via BC on the planner actions.
        """
        self.pol.train()

        params = self.params['p-bc']

        # Generate batches for training
        size = min(self.pol_buf.size, self.pol_buf.total_in)
        num_inds = params['batch_size'] * params['grad_steps']
        inds = np.random.randint(0, size, size=num_inds)

        states = self.pol_buf.buffer['s'][inds]
        acts = self.pol_buf.buffer['a'][inds]

        states = torch.tensor(states, dtype=self.dtype)
        actions = torch.tensor(acts, dtype=self.dtype)

        for i in range(params['grad_steps']):
            bi, ei = i * params['batch_size'], (i + 1) * params['batch_size']

            # Train based on L2 distance between actions and predictions
            preds = self.pol.forward(states[bi:ei])
            preds = torch.squeeze(preds, dim=-1)
            targets = torch.squeeze(actions[bi:ei], dim=-1)

            loss = torch.nn.functional.mse_loss(preds, targets)

            self.pol_optim.zero_grad()
            loss.backward()
            self.pol_optim.step()

    def get_traj_info(self):
        """
        Run the policy for a full trajectory and return details about the
        trajectory.
        """
        env_state = self.env.sim.get_state() if self.mujoco else None

        infos = traj.eval_traj(copy.deepcopy(self.env),
                               env_state,
                               self.prev_obs,
                               mujoco=self.mujoco,
                               perturb=self.perturb,
                               H=self.H,
                               gamma=self.gamma,
                               act_mode='deter',
                               pt=(self.pol, 0),
                               terminal=self.val_ens,
                               tvel=self.tvel)

        return infos

    def print_logs(self):
        """
        BC-specific logging information.
        """
        bi, ei = super(BCAgent, self).print_logs()

        self.print('BC metrics', mode='head')

        self.print('policy traj rew', self.hist['pols'][self.time - 1][0])
        self.print('policy traj emp rew', self.hist['pols'][self.time - 1][1])

        return bi, ei

    def test_policy(self):
        """
        Run the BC action selection mechanism.
        """
        env = copy.deepcopy(self.env)
        obs = env.reset()

        if self.tvel is not None:
            env.set_target_vel(self.tvel)
            obs = env._get_obs()

        env_state = env.sim.get_state() if self.mujoco else None
        infos = traj.eval_traj(env,
                               env_state,
                               obs,
                               mujoco=self.mujoco,
                               perturb=self.perturb,
                               H=self.eval_len,
                               gamma=1,
                               act_mode='deter',
                               pt=(self.pol, 0),
                               tvel=self.tvel)

        self.hist['pol_test'][self.time] = infos[3]
Ejemplo n.º 18
0
        'dev'], datasets['test']
    seq_indexer = SeqIndexerBaseEmbeddings("glove", args.embedding_dir,
                                           args.embedding_dim, ' ')
    seq_indexer.load_embeddings_from_file()

    label_indexer = SeqIndexerBase("laebl", False, False)
    label_indexer.add_instance(dataset.train_label)

    if args.load is not None:
        model = torch.load(args.load)
        if args.gpu >= 0:
            model.cuda(device=args.gpu)
    else:
        if args.model == 'MLP':
            model = MLP(embedding_indexer=seq_indexer,
                        gpu=args.gpu,
                        feat_num=label_indexer.__len__(),
                        dropout=args.dropout_rate)
        elif args.model == 'CNN':
            model = TextCNN(embedding_indexer=seq_indexer,
                            gpu=args.gpu,
                            feat_num=label_indexer.__len__(),
                            dropout=args.dropout_rate,
                            kernel_size=[2, 3, 5])

    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.learning_rate,
                                 betas=(0.9, 0.999),
                                 eps=1e-08,
                                 weight_decay=0,
                                 amsgrad=False)
Ejemplo n.º 19
0
    parser.add_argument(
        '-v',
        '--validation',
        dest='val',
        type=float,
        default=10.0,
        help='Percent of the data that is used as validation (0-100)')

    return parser.parse_args()


if __name__ == '__main__':
    args = get_args()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    net = MLP(1, 3)

    if args.load:
        net.lead_state_dict(torch.load(args.load, map_location=device))

    net.to(device=device)

    try:
        train_net(net=net,
                  epochs=args.epochs,
                  batch_size=args.batchsize,
                  lr=args.lr,
                  device=device,
                  val_percent=args.val / 100)
    except KeyboardInterrupt:
        torch.save(net.state_dict(), 'INTERRUPTED.pth')
Ejemplo n.º 20
0
def main():
    global opt
    opt = parser.parse_args()
    use_gpu = torch.cuda.is_available()

    # Set up logging
    if opt.savepath == None:
        path = os.path.join('save', datetime.datetime.now().strftime("%d-%H-%M-%S"))
    else:
        path = opt.savepath
    os.makedirs(path, exist_ok=True)
    logger = utils.Logger(path)

    # Keep track of accuracies 
    val_accuracies = []
    test_accuracies = []

    # Seed for cross-val split
    seed = random.randint(0,10000) if opt.seed < 0 else opt.seed
    logger.log('SEED: {}'.format(seed), stdout=False)

    # Load data
    if opt.preloaded_splits.lower() == 'none':
        start = time.time()
        data, label = get_data(opt.data, opt.label)
        logger.log('Data loaded in {:.1f}s\n'.format(time.time() - start))
    else:
        data, label = np.zeros(5), np.zeros(5) # dummy labels for iterating over
        logger.log('Using preloaded splits\n')

    # Create cross-validation splits
    kf = StratifiedKFold(n_splits=5, random_state=seed, shuffle=True)

    # Cross validate 
    for i, (train_index, test_index) in enumerate(kf.split(data, label)):

        # Log split
        logger.log('------------- SPLIT {} --------------\n'.format(i+1))

        # Train / test split (ignored if opt.preloaded_splits is not 'none')
        X, X_test = data[train_index], data[test_index]
        y, y_test = label[train_index], label[test_index]

        # Perform PCA and generate dataloader or load from saved file
        start = time.time()
        apply_pca_transform = (opt.arch not in ['exp'])
        train_loader, val_loader, test_loader, pca_components, input_size, num_classes, pca_matrix = \
            get_dataloader(opt.preloaded_splits, X, X_test, y, y_test, batch_size=opt.b, val_fraction=opt.val_fraction, 
                           pca_components=opt.pca_components, apply_pca_transform=apply_pca_transform, 
                           imputation_dim=opt.impute, split=i, save_dataset=(not opt.no_save_dataset))
        logger.log('Dataloader loaded in {:.1f}s\n'.format(time.time() - start))

        # Model 
        arch = opt.arch.lower()
        assert arch in ['logreg', 'mlp', 'exp']
        if arch == 'logreg': 
            model = LogisticRegression(input_size, opt.pca_components, num_classes)
        elif arch == 'mlp':
            model = MLP(input_size, opt.hidden_size, num_classes, opt.dp) 
        elif arch == 'exp': 
            model = ExperimentalModel(input_size, opt.pca_components, opt.hidden_size, num_classes, opt.dp) 
        
        # Pretrained / Initialization
        if opt.model is not None and os.path.isfile(opt.model):
            # Pretrained model
            model.load_state_dict(torch.load(opt.model))
            logger.log('Loaded pretrained model.', stdout=(i==0))
        else:
            # Initialize model uniformly
            for p in model.parameters():
                p.data.uniform_(-0.1, 0.1)
            logger.log('Initialized model from scratch.', stdout=(i==0))
        model = model.cuda() if use_gpu else model
        print(model)

        # Initialize first layer with PCA and fix PCA weights if model requires
        if opt.arch in ['exp']:
            model.first_layer.weight.data.copy_(pca_matrix)
            logger.log('Initialized first layer as PCA', stdout=(i==0))
            if not opt.finetune_pca:
                model.first_layer.weight.requires_grad = False
                logger.log('Fixed PCA weights', stdout=(i==0))

        # Loss function and optimizer
        criterion = nn.CrossEntropyLoss(size_average=False)
        optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr, weight_decay=opt.wd) 
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=opt.lr_decay_patience, 
                        factor=opt.lr_decay_factor, verbose=True, cooldown=opt.lr_decay_cooldown)

        # Log parameters
        logger.log('COMMAND LINE ARGS: ' + ' '.join(sys.argv), stdout=False)
        logger.log('ARGS: {}\nOPTIMIZER: {}\nLEARNING RATE: {}\nSCHEDULER: {}\nMODEL: {}\n'.format(
            opt, optimizer, opt.lr, vars(scheduler), model), stdout=False)

        # If specified, only evaluate model
        if opt.evaluate:
            assert opt.model != None, 'no pretrained model to evaluate'
            total_correct, total, _ = validate(model, val_loader, criterion)
            logger.log('Accuracy: {:.3f} \t Total correct: {} \t Total: {}'.format(
                total_correct/total, total_correct, total))
            return 

        # Train model 
        start_time = time.time()
        best_acc = train(model, train_loader, val_loader, optimizer, criterion, logger, 
            num_epochs=opt.epochs, print_freq=opt.print_freq, model_id=i)
        logger.log('Best train accuracy: {:.2f}% \t Finished split {} in {:.2f}s\n'.format(
            100 * best_acc, i+1, time.time() - start_time))
        val_accuracies.append(best_acc)

        # Best evaluation on validation set
        best_model_path = os.path.join(path, 'model_{}.pth'.format(i))
        model.load_state_dict(torch.load(best_model_path)) # load best model
        total_correct, total, _ = validate(model, val_loader, criterion) # check val set
        logger.log('Val Accuracy: {:.3f} \t Total correct: {} \t Total: {}'.format(
            total_correct/total, total_correct, total))

        # Optionally also evaluate on test set
        if opt.test:
            total_correct, total, visualize = validate(model, test_loader, criterion, visualize=True) # run test set
            logger.log('Test Accuracy: {:.3f} \t Total correct: {} \t Total: {}\n'.format(
                total_correct/total, total_correct, total))
            logger.save_model(visualize, 'visualize_{}.pth'.format(i))
            test_accuracies.append(total_correct/total)
    
    # Log after training
    logger.log('Val Accuracies: {}'.format(val_accuracies))
    logger.log('Test Accuracies: {}'.format(test_accuracies))
    logger.log('Run id: {} \t Test Accuracies: {}'.format(opt.id, test_accuracies))
    def __init__(self, cfg):
        trainLoader, valLoader = get_dataloaders(
            args.trainNormalFolder, args.trainNormalAnnotations,
            args.trainAbnormalFolder, args.trainAbnormalAnnotations,
            args.trainNormalTopK, args.valNormalFolder,
            args.valNormalAnnotations, args.valAbnormalFolder,
            args.valAbnormalAnnotations, args.valNormalTopK, args.batchSize,
            args.numWorkers, args.model, args.windowSize, args.subWindows,
            args.featureSize, args.maxVideoSize)

        self.modelType = args.model
        self.trainLoader = trainLoader
        self.valLoader = valLoader
        self.expFolder = args.expFolder
        self.maskValue = args.maskValue
        self.stepCounter = 0
        self.bestAUC = 0
        self.noNormalSegmentation = args.noNormalSegmentation
        self.lossType = args.loss

        if args.model == "mlp":
            self.model = MLP(featureSize=args.featureSize)
        elif args.model == "tcn":
            self.model = EDTCN(featureSize=args.featureSize,
                               kernelSize=args.kernelSize)
        elif args.model == "mstcn":
            self.model = MultiStageModel(num_stages=args.numStages,
                                         num_layers=args.numLayers,
                                         num_f_maps=args.numFeatureMaps,
                                         dim=args.featureSize,
                                         ssRepeat=args.firstStageRepeat)
            print("[Info] MS-TCN W{}-S{}-L{} have been created".format(
                args.windowSize, args.numStages, args.numLayers))
        # elif args.model == "mcbtcn":
        #     self.model = MultiClassBinaryTCN(numClassStages=args.numClassStages, numBinaryStages=args.numBinaryStages,
        #                                      num_layers=args.numLayers, num_f_maps=args.numFeatureMaps,
        #                                      dim=args.featureSize, numClasses=16)

        self.model = self.model.float()

        # if torch.cuda.is_available():
        #     self.model = self.model.cuda()

        if args.optimizer == "adam":
            self.optimizer = torch.optim.Adam(self.model.parameters(),
                                              lr=args.learningRate,
                                              betas=(0.5, 0.9),
                                              eps=1e-08,
                                              weight_decay=0,
                                              amsgrad=False)
        self.scheduler = torch.optim.lr_scheduler.StepLR(
            self.optimizer,
            step_size=args.schedulerStepSize,
            gamma=args.schedulerGamma)
        if args.modelPath:
            self.loadCheckpoint(args.modelPath)
            print("[Info] Model have been loaded at {}".format(args.modelPath))

        if torch.cuda.is_available():
            self.model = self.model.cuda()

        self.model = self.model.float()

        self.ceLoss = torch.nn.CrossEntropyLoss(ignore_index=-1)
        self.ASLoss = TemporalHardPairLoss(max_violation=True,
                                           margin=args.adLossMargin,
                                           measure="output")
        self.mseLoss = torch.nn.MSELoss()
        self.lossLambda = args.adLossLambda

        self.writer = None
        if not args.test:
            self.writer = SummaryWriter(log_dir=args.expFolder)
Ejemplo n.º 22
0
def choose_model(conf, G, features, labels, byte_idx_train, labels_one_hot):
    if conf['model_name'] == 'GCN':
        model = GCN(g=G,
                    in_feats=features.shape[1],
                    n_hidden=conf['hidden'],
                    n_classes=labels.max().item() + 1,
                    n_layers=1,
                    activation=F.relu,
                    dropout=conf['dropout']).to(conf['device'])
    elif conf['model_name'] == 'GAT':
        num_heads = 8
        num_layers = 1
        num_out_heads = 1
        heads = ([num_heads] * num_layers) + [num_out_heads]
        model = GAT(
            g=G,
            num_layers=num_layers,
            in_dim=G.ndata['feat'].shape[1],
            num_hidden=8,
            num_classes=labels.max().item() + 1,
            heads=heads,
            activation=F.relu,
            feat_drop=0.6,
            attn_drop=0.6,
            negative_slope=0.2,  # negative slope of leaky relu
            residual=False).to(conf['device'])
    elif conf['model_name'] == 'PLP':
        model = PLP(g=G,
                    num_layers=conf['num_layers'],
                    in_dim=G.ndata['feat'].shape[1],
                    emb_dim=conf['emb_dim'],
                    num_classes=labels.max().item() + 1,
                    activation=F.relu,
                    feat_drop=conf['feat_drop'],
                    attn_drop=conf['attn_drop'],
                    residual=False,
                    byte_idx_train=byte_idx_train,
                    labels_one_hot=labels_one_hot,
                    ptype=conf['ptype'],
                    mlp_layers=conf['mlp_layers']).to(conf['device'])
    elif conf['model_name'] == 'GraphSAGE':
        model = GraphSAGE(in_feats=G.ndata['feat'].shape[1],
                          n_hidden=16,
                          n_classes=labels.max().item() + 1,
                          n_layers=1,
                          activation=F.relu,
                          dropout=0.5,
                          aggregator_type=conf['agg_type']).to(conf['device'])
    elif conf['model_name'] == 'APPNP':
        model = APPNP(g=G,
                      in_feats=G.ndata['feat'].shape[1],
                      hiddens=[64],
                      n_classes=labels.max().item() + 1,
                      activation=F.relu,
                      feat_drop=0.5,
                      edge_drop=0.5,
                      alpha=0.1,
                      k=10).to(conf['device'])
    elif conf['model_name'] == 'LogReg':
        model = MLP(num_layers=1,
                    input_dim=G.ndata['feat'].shape[1],
                    hidden_dim=None,
                    output_dim=labels.max().item() + 1,
                    dropout=0).to(conf['device'])
    elif conf['model_name'] == 'MLP':
        model = MLP(num_layers=2,
                    input_dim=G.ndata['feat'].shape[1],
                    hidden_dim=conf['hidden'],
                    output_dim=labels.max().item() + 1,
                    dropout=conf['dropout']).to(conf['device'])
    else:
        raise ValueError(f'Undefined Model.')
    return model
 def get_model(self, model_cfg):
     model = MLP(featureSize=model_cfg.feature_size)
     return model
Ejemplo n.º 24
0
def run_episode(strategies, policy, beta, device, num_worker):
    states, actions = [], []
    # all strategies use same initial training data and model weights
    reinit_seed(prop.RANDOM_SEED)
    if prop.MODEL == "MLP":
        model = MLP().apply(weights_init).to(device)
    if prop.MODEL == "CNN":
        model = CNN().apply(weights_init).to(device)
    if prop.MODEL == "RESNET18":
        model = models.resnet.ResNet18().to(device)
    init_weights = deepcopy(model.state_dict())

    # re-init seed was here before
    use_learner = True if np.random.rand(1) > beta else False
    if use_learner:
        policy = policy.to(
            device)  # load policy only when learner is used for states

    dataset_pool, valid_dataset, test_dataset = get_policy_training_splits()

    train_dataset, pool_dataset = stratified_split_dataset(
        dataset_pool, prop.INIT_SIZE, prop.NUM_CLASSES)

    # Initial sampling
    if prop.SINGLE_HEAD:
        my_strategies = []
        for StrategyClass in strategies:
            my_strategies.append(
                StrategyClass(dataset_pool, valid_dataset, test_dataset))
    if prop.CLUSTER_EXPERT_HEAD:
        UncertaintyStrategieClasses, DiversityStrategieClasses = strategies
        un_strategies = []
        di_strategies = []
        for StrategyClass in UncertaintyStrategieClasses:
            un_strategies.append(
                StrategyClass(dataset_pool, valid_dataset, test_dataset))
        for StrategyClass in DiversityStrategieClasses:
            di_strategies.append(
                StrategyClass(dataset_pool, valid_dataset, test_dataset))
    if prop.CLUSTERING_AUX_LOSS_HEAD:
        my_strategies = []
        for StrategyClass in strategies:
            my_strategies.append(
                StrategyClass(dataset_pool, valid_dataset, test_dataset))

    init_acc = train_validate_model(model, device, train_dataset,
                                    valid_dataset, test_dataset)

    t = trange(1,
               prop.NUM_ACQS + 1,
               desc="Aquisitions (size {})".format(prop.ACQ_SIZE),
               leave=True)
    for acq_num in t:
        subset_ind = np.random.choice(a=len(pool_dataset),
                                      size=prop.K,
                                      replace=False)
        pool_subset = make_tensordataset(pool_dataset, subset_ind)
        if prop.CLUSTER_EXPERT_HEAD:
            un_sel_ind = expert(acq_num, model, init_weights, un_strategies,
                                train_dataset, pool_subset, valid_dataset,
                                test_dataset, device)
            di_sel_ind = expert(acq_num, model, init_weights, un_strategies,
                                train_dataset, pool_subset, valid_dataset,
                                test_dataset, device)
            state, action = get_state_action(model,
                                             train_dataset,
                                             pool_subset,
                                             un_sel_ind=un_sel_ind,
                                             di_sel_ind=di_sel_ind)
        if prop.SINGLE_HEAD:
            sel_ind = expert(acq_num, model, init_weights, my_strategies,
                             train_dataset, pool_subset, valid_dataset,
                             test_dataset, device)
            state, action = get_state_action(model,
                                             train_dataset,
                                             pool_subset,
                                             sel_ind=sel_ind)
        if prop.CLUSTERING_AUX_LOSS_HEAD:
            sel_ind = expert(acq_num, model, init_weights, my_strategies,
                             train_dataset, pool_subset, valid_dataset,
                             test_dataset, device)
            state, action = get_state_action(model,
                                             train_dataset,
                                             pool_subset,
                                             sel_ind=sel_ind,
                                             clustering=None)
            # not implemented

        states.append(state)
        actions.append(action)
        if use_learner:
            with torch.no_grad():
                if prop.SINGLE_HEAD:
                    policy_outputs = policy(state.to(device)).flatten()
                    sel_ind = torch.topk(policy_outputs,
                                         prop.ACQ_SIZE)[1].cpu().numpy()
                if prop.CLUSTER_EXPERT_HEAD:
                    policy_output_uncertainty, policy_output_diversity = policy(
                        state.to(device))
                    # clustering_space = policy_output_diversity.reshape(prop.K, prop.POLICY_OUTPUT_SIZE)
                    # one topk for uncertainty, one topk for diversity
                    diversity_selection = torch.topk(
                        policy_output_diversity.reshape(prop.K),
                        int(prop.ACQ_SIZE / 2.0))[1].cpu().numpy()
                    uncertainty_selection = torch.topk(
                        policy_output_uncertainty.reshape(prop.K),
                        int(prop.ACQ_SIZE / 2.0))[1].cpu().numpy()
                    sel_ind = (uncertainty_selection, diversity_selection)
                if prop.CLUSTERING_AUX_LOSS_HEAD:
                    # not implemented
                    policy_outputs = policy(state.to(device)).flatten()
                    sel_ind = torch.topk(policy_outputs,
                                         prop.ACQ_SIZE)[1].cpu().numpy()

        if prop.SINGLE_HEAD:
            q_idxs = subset_ind[sel_ind]  # from subset to full pool
        if prop.CLUSTER_EXPERT_HEAD:
            unified_sel_ind = np.concatenate((sel_ind[0], sel_ind[1]))
            q_idxs = subset_ind[unified_sel_ind]  # from subset to full pool
        remaining_ind = list(set(np.arange(len(pool_dataset))) - set(q_idxs))

        sel_dataset = make_tensordataset(pool_dataset, q_idxs)
        train_dataset = concat_datasets(train_dataset, sel_dataset)
        pool_dataset = make_tensordataset(pool_dataset, remaining_ind)

        test_acc = train_validate_model(model, device, train_dataset,
                                        valid_dataset, test_dataset)

    return states, actions
    def __init__(self, env, args, device='cpu'):
        """
        Instantiate an MFEC Agent
        ----------
        env: gym.Env
            gym environment to train on
        args: args class from argparser
            args are from from train.py: see train.py for help with each arg
        device: string
            'cpu' or 'cuda:0' depending on use_cuda flag from train.py
        """
        self.environment_type = args.environment_type
        self.env = env
        self.actions = range(self.env.action_space.n)
        self.frames_to_stack = args.frames_to_stack
        self.Q_train_algo = args.Q_train_algo
        self.use_Q_max = args.use_Q_max
        self.force_knn = args.force_knn
        self.weight_neighbors = args.weight_neighbors
        self.delta = args.delta
        self.device = device
        self.rs = np.random.RandomState(args.seed)

        # Hyperparameters
        self.epsilon = args.initial_epsilon
        self.final_epsilon = args.final_epsilon
        self.epsilon_decay = args.epsilon_decay
        self.gamma = args.gamma
        self.lr = args.lr
        self.q_lr = args.q_lr

        # Autoencoder for state embedding network
        self.vae_batch_size = args.vae_batch_size  # batch size for training VAE
        self.vae_epochs = args.vae_epochs  # number of epochs to run VAE
        self.embedding_type = args.embedding_type
        self.SR_embedding_type = args.SR_embedding_type
        self.embedding_size = args.embedding_size
        self.in_height = args.in_height
        self.in_width = args.in_width

        if self.embedding_type == 'VAE':
            self.vae_train_frames = args.vae_train_frames
            self.vae_loss = VAELoss()
            self.vae_print_every = args.vae_print_every
            self.load_vae_from = args.load_vae_from
            self.vae_weights_file = args.vae_weights_file
            self.vae = VAE(self.frames_to_stack, self.embedding_size,
                           self.in_height, self.in_width)
            self.vae = self.vae.to(self.device)
            self.optimizer = get_optimizer(args.optimizer,
                                           self.vae.parameters(), self.lr)
        elif self.embedding_type == 'random':
            self.projection = self.rs.randn(
                self.embedding_size, self.in_height * self.in_width *
                self.frames_to_stack).astype(np.float32)
        elif self.embedding_type == 'SR':
            self.SR_train_algo = args.SR_train_algo
            self.SR_gamma = args.SR_gamma
            self.SR_epochs = args.SR_epochs
            self.SR_batch_size = args.SR_batch_size
            self.n_hidden = args.n_hidden
            self.SR_train_frames = args.SR_train_frames
            self.SR_filename = args.SR_filename
            if self.SR_embedding_type == 'random':
                self.projection = np.random.randn(
                    self.embedding_size,
                    self.in_height * self.in_width).astype(np.float32)
                if self.SR_train_algo == 'TD':
                    self.mlp = MLP(self.embedding_size, self.n_hidden)
                    self.mlp = self.mlp.to(self.device)
                    self.loss_fn = nn.MSELoss(reduction='mean')
                    params = self.mlp.parameters()
                    self.optimizer = get_optimizer(args.optimizer, params,
                                                   self.lr)

        # QEC
        self.max_memory = args.max_memory
        self.num_neighbors = args.num_neighbors
        self.qec = QEC(self.actions, self.max_memory, self.num_neighbors,
                       self.use_Q_max, self.force_knn, self.weight_neighbors,
                       self.delta, self.q_lr)

        #self.state = np.empty(self.embedding_size, self.projection.dtype)
        #self.action = int
        self.memory = []
        self.print_every = args.print_every
        self.episodes = 0
Ejemplo n.º 26
0
class VPGAgent(Agent):
    """
    An agent running online policy gradient. Calling VPGAgent itself uses
    REINFORCE, but can be subclassed for other policy gradient class algorithms.
    """
    def __init__(self, params):
        super(VPGAgent, self).__init__(params)
        self.H = self.params['pg']['H']
        self.lam = self.params['pg']['lam']

        # Initialize policy network
        pol_params = self.params['pg']['pol_params']
        pol_params['input_size'] = self.N
        pol_params['output_size'] = self.M
        if 'final_activation' not in pol_params:
            pol_params['final_activation'] = torch.tanh

        self.pol = MLP(pol_params)

        # Std's are not dependent on state
        init_log_std = -0.8 * torch.ones(self.M)  # ~0.45
        self.log_std = torch.nn.Parameter(init_log_std, requires_grad=True)

        # Create policy optimizer
        ppar = self.params['pg']['pol_optim']
        self.pol_params = list(self.pol.parameters()) + [self.log_std]
        self.pol_optim = torch.optim.Adam(self.pol_params,
                                          lr=ppar['lr'],
                                          weight_decay=ppar['reg'])

        # Create value function and optimizer
        val_params = self.params['pg']['val_params']
        val_params['input_size'] = self.N
        val_params['output_size'] = 1

        self.val = MLP(val_params)

        vpar = self.params['pg']['val_optim']
        self.val_optim = torch.optim.Adam(self.val.parameters(),
                                          lr=vpar['lr'],
                                          weight_decay=vpar['reg'])

        # Logging
        self.hist['ent'] = np.zeros(self.T)

    def get_dist(self, s):
        """
        Create a pytorch normal distribution from
        the policy network for state s.
        """
        s = torch.tensor(s, dtype=self.dtype)
        mu = self.pol.forward(s)
        std = self.log_std.exp()

        return torch.distributions.Normal(mu, std)

    def get_ent(self):
        """
        Return the current entropy (multivariate Gaussian).
        """
        std = self.log_std.exp()
        tpe = 2 * np.pi * np.e
        return .5 * torch.log(tpe * torch.prod(std))

    def get_action(self):
        """
        Gets action by running policy.
        """
        self.pol.eval()

        if self.params['pg']['run_deterministic']:
            x = torch.tensor(self.prev_obs, dtype=self.dtype)
            act = self.pol.forward(x).detach().cpu().numpy()
        else:
            act = sample_pol(self.pol, self.log_std, self.prev_obs)

        act = np.clip(act, self.params['env']['min_act'],
                      self.params['env']['max_act'])

        self.hist['ent'][self.time] = self.get_ent().detach().cpu().numpy()

        return act

    def do_updates(self):
        """
        Performs actor and critic updates.
        """
        if self.time % self.params['pg']['update_every'] == 0 or self.time == 1:
            plan_time = 0
            H, num_rollouts = self.H, self.params['pg']['num_rollouts']
            for i in range(self.params['pg']['num_iter']):
                # Sample rollouts using ground truth model
                check = time.time()
                rollouts = self.sample_rollouts(H, num_rollouts)
                plan_time += time.time() - check

                # Performs value updates alongside advantage calculation
                rews = self.update_pol(rollouts)

            # Time spent generating rollouts should be considered planning time
            self.hist['plan_time'][self.time - 1] += plan_time
            self.hist['update_time'][self.time - 1] -= plan_time

    def sample_rollouts(self, H, num_rollouts):
        """
        Use traj module to sample rollouts using the policy.
        """
        env_state = self.env.sim.get_state() if self.mujoco else None

        self.pol.eval()
        rollouts = traj.generate_trajectories(
            num_rollouts,
            self.env,
            env_state,
            self.prev_obs,
            mujoco=self.mujoco,
            perturb=self.perturb,
            H=self.H,
            gamma=self.gamma,
            act_mode='gauss',
            pt=(sample_pol, self.pol, self.log_std),
            terminal=None,
            tvel=self.tvel,
            num_cpu=self.params['pg']['num_cpu'])

        return rollouts

    def update_val(self, obs, targets):
        """
        Update value function with MSE loss.
        """
        preds = self.val.forward(obs)
        preds = torch.squeeze(preds, dim=-1)

        loss = torch.nn.functional.mse_loss(targets, preds)

        self.val_optim.zero_grad()
        loss.backward(retain_graph=True)
        self.val_optim.step()

        return loss.item()

    def calc_advs(self, obs, rews, update_vals=True):
        """
        Calculate advantages for use of updating the policy (and updating value
        function). Can either use rewards-to-go or GAE.
        """
        num_rollouts, H = obs.shape[:2]

        self.val.eval()

        if not self.params['pg']['use_gae']:
            # Calculate terminal values
            fin_obs = obs[:, -1]
            fin_vals = self.val.forward(fin_obs)
            fin_vals = torch.squeeze(fin_vals, dim=-1)

            # Calculate rewards-to-go
            rtg = torch.zeros((num_rollouts, H))
            for k in reversed(range(H)):
                if k < H - 1:
                    rtg[:, k] += self.gamma * rtg[:, k + 1]
                else:
                    rtg[:, k] += self.gamma * fin_vals
                rtg[:, k] += rews[:, k]

            if update_vals:
                self.val.train()
                self.update_val(obs, rtg)

            # Normalize advantages for policy gradient
            for k in range(H):
                rtg[:, k] -= torch.mean(rtg[:, k])

            return rtg

        # Generalized Advantage Estimation (GAE)
        prev_obs = torch.tensor(self.prev_obs, dtype=self.dtype)
        orig_val = self.val.forward(prev_obs)
        vals = torch.squeeze(self.val.forward(obs), dim=-1)

        deltas = torch.zeros(rews.shape)
        advs = torch.zeros((num_rollouts, H))

        lg = self.lam * self.gamma
        for k in reversed(range(H)):
            prev_vals = vals[:, k - 1] if k > 0 else orig_val
            deltas[:, k] = self.gamma * vals[:, k] + rews[:, k] - prev_vals

            if k == H - 1:
                advs[:, k] = deltas[:, k]
            else:
                advs[:, k] = lg * advs[:, k + 1] + deltas[:, k]

        advs = advs.detach()

        # Optionally, also update the value functions
        if update_vals:
            self.val.train()

            # It is reasonable to train on advs or deltas
            dvals = advs

            # Have to perform trick to match deltas with prev vals
            fvals = torch.stack([orig_val for _ in range(vals.shape[0])],
                                dim=0)
            rets = torch.cat(
                [fvals + dvals[:, :1], vals[:, :-1] + dvals[:, 1:]], dim=-1)
            fobs = torch.unsqueeze(prev_obs, dim=0)
            fobs = torch.stack([fobs for _ in range(vals.shape[0])], dim=0)
            obs = torch.cat([fobs, obs[:, :-1]], dim=1)

            self.update_val(obs, rets)

        # Normalize advantages for policy gradient
        advs -= torch.mean(advs)
        advs /= 1e-3 + torch.std(advs)

        return advs

    def get_pol_loss(self, logprob, advs, orig_logprob=None):
        """
        For REINFORCE, the policy loss is thelogprobs times the advatanges. It
        is important that the logprobs carry the gradient so that we can
        backpropagate through them in the policy update.
        """
        return torch.mean(logprob * advs)

    def get_logprob(self, pol, log_std, obs, acts):
        """
        Get log probabilities for the actions, keeping the gradients.
        """
        num_rollouts, H = obs.shape[0:2]

        pol.train()

        dist = self.get_dist(obs)
        logprob = dist.log_prob(acts).sum(-1)

        return logprob

    def update_pol(self, rollouts, orig_logprob=None):
        """
        Update the policy on the on-policy rollouts.
        """
        H = rollouts[0][0].shape[0]

        self.pol.train()

        obs = np.zeros((len(rollouts), self.H, self.N))
        acts = np.zeros((len(rollouts), self.H, self.M))
        rews = torch.zeros((len(rollouts), self.H))
        for i in range(len(rollouts)):
            for k in range(self.H):
                obs[i, k] = rollouts[i][0][k]
                acts[i, k] = rollouts[i][1][k]
                rews[i, k] = rollouts[i][2][k]

        obs = torch.tensor(obs, dtype=self.dtype)
        acts = torch.tensor(acts, dtype=self.dtype)

        # Perform updates for multiple steps on the value function
        if self.params['pg']['use_gae']:
            for _ in range(self.params['pg']['val_steps']):
                advs = self.calc_advs(obs, rews, update_vals=True)
        else:
            advs = self.calc_advs(obs, rews, update_vals=False)

        # Perform updates for multiple epochs on the policy
        bsize = self.params['pg']['batch_size']
        for _ in range(self.params['pg']['pol_steps']):
            inds = np.random.permutation(len(rollouts))

            binds = inds[:bsize]
            bobs, bacts = obs[binds], acts[binds]
            brews, badvs = rews[binds], advs[binds]

            if orig_logprob is not None:
                bprobs = orig_logprob[binds]
            else:
                bprobs = None

            # Get a logprob that has gradients
            logprob = self.get_logprob(self.pol, self.log_std, bobs, bacts)
            if not self.continue_updates(logprob, bprobs):
                break

            # Compute policy loss (i.e. gradient ascent)
            J = -self.get_pol_loss(logprob, badvs, orig_logprob=bprobs)

            # Apply entropy bonus
            ent_coef = self.params['pg']['pol_optim']['ent_temp']
            if ent_coef != 0:
                J -= ent_coef * self.get_ent()

            self.pol_optim.zero_grad()
            torch.nn.utils.clip_grad_norm_(self.pol.parameters(),
                                           self.params['pg']['grad_clip'])
            J.backward()
            self.pol_optim.step()

            # Clamp stds to be within set bounds
            log_min = np.log(self.params['pg']['min_std'])
            log_min = torch.tensor(log_min, dtype=self.dtype)
            log_max = np.log(self.params['pg']['max_std'])
            log_max = torch.tensor(log_max, dtype=self.dtype)
            self.log_std.data = torch.clamp(self.log_std.data, log_min,
                                            log_max)

        return rews

    def continue_updates(self, logprob, orig_logprob=None):
        """
        Method for whether or not to continue updates.
        """
        return True

    def print_logs(self):
        """
        Policy gradient-specific logging information.
        """
        bi, ei = super(VPGAgent, self).print_logs()

        self.print('policy gradient metrics', mode='head')

        self.print('entropy avg', np.mean(self.hist['ent'][bi:ei]))
        self.print('sigma avg',
                   np.mean(torch.exp(self.log_std).detach().cpu().numpy()))

        return bi, ei
Ejemplo n.º 27
0
                                                                val_size=args.val_size,
                                                                random_seed=args.random_seed, )

    os.makedirs('losses/', exist_ok=True)

    if args.model.lower() == 'gbdt':
        from models.GBDT import GBDT
        model = GBDT(depth=args.depth)
        model.fit(X, y, train_mask, val_mask, test_mask,
                 cat_features=cat_features, num_epochs=args.num_features, patience=args.patience,
                 learning_rate=args.learning_rate, plot=False, verbose=False,
                 loss_fn=args.loss_fn)

    elif args.model.lower() == 'mlp':
        from models.MLP import MLP
        model = MLP(task=args.task)
        min_rmse_epoch, accuracies = model.fit(X, y, train_mask, val_mask, test_mask,
                                               cat_features=cat_features, num_epochs=args.num_epochs, patience=args.patience,
                                               learning_rate=args.learning_rate, hidden_dim=args.hidden_dim,
                                               logging_epochs=args.logging_steps, loss_fn=args.loss_fn)

        model.plot(accuracies, legend=['Train', 'Val', 'Test'], title='MLP RMSE', output_fn='mlp_losses.pdf')
    elif args.model.lower() == 'gnn':
        from models.GNN import GNN
        model = GNN(heads=args.heads, feat_drop=args.feat_drop, attn_drop=args.attn_drop)

        min_rmse_epoch, accuracies = model.fit(networkx_graph, X, y, train_mask, val_mask, test_mask,
                                               cat_features=cat_features, num_epochs=args.num_epochs, patience=args.patience,
                                               learning_rate=args.learning_rate, hidden_dim=args.hidden_dim, logging_epochs=args.logging_steps,
                                               optimize_node_features=args.input_grad, loss_fn=args.loss_fn)
Ejemplo n.º 28
0
    def classifier_selection(self):
        """
        Function that instanciates classifiers

        :arg
            self (Trainer): instance of the class

        :return
            model (Classifier): Selected model when self.classfier is 1
            model1 (Classifier): First selected model when self.classfier is 2
            model2 (Classifier): Second selected model when self.classfier is 2
            classifier_list (list): List with selected classifier names
        """
        if self.classifier == 'SVM':
            classifier_list = ['SVM']
            if self.classifier_type == 1:
                model = SVMClassifier(cv=self.cross_validation)
            elif self.classifier_type == 2:
                model1 = SVMClassifier(cv=self.cross_validation)
                model2 = SVMClassifier(cv=self.cross_validation)

        elif self.classifier == 'LogisticRegressor':
            classifier_list = ['LogisticRegressor']
            model = LogisticRegressor()
            if self.classifier_type == 1:
                model = LogisticRegressor(cv=self.cross_validation)
            elif self.classifier_type == 2:
                model1 = LogisticRegressor(cv=self.cross_validation)
                model2 = LogisticRegressor(cv=self.cross_validation)

        elif self.classifier == 'MLP':
            classifier_list = ['MLP']
            if self.classifier_type == 1:
                model = MLP(cv=self.cross_validation)
            elif self.classifier_type == 2:
                model1 = MLP(cv=self.cross_validation)
                model2 = MLP(cv=self.cross_validation)

        elif self.classifier == 'RandomForest':
            classifier_list = ['RandomForest']
            model = RandomForest()
            if self.classifier_type == 1:
                model = RandomForest(cv=self.cross_validation)
            elif self.classifier_type == 2:
                model1 = RandomForest(cv=self.cross_validation)
                model2 = RandomForest(cv=self.cross_validation)

        elif self.classifier == 'RBF':
            classifier_list = ['RBF']
            if self.classifier_type == 1:
                model = RBFClassifier(cv=self.cross_validation)
            elif self.classifier_type == 2:
                model1 = RBFClassifier(cv=self.cross_validation)
                model2 = RBFClassifier(cv=self.cross_validation)

        elif self.classifier == 'Fisher':
            classifier_list = ['Fisher']
            if self.classifier_type == 1:
                model = FisherDiscriminant(cv=self.cross_validation)
            elif self.classifier_type == 2:
                model1 = FisherDiscriminant(cv=self.cross_validation)
                model2 = FisherDiscriminant(cv=self.cross_validation)

        elif self.classifier == 'all':
            classifier_list = ['SVM', 'MLP', 'LogisticRegressor', 'RandomForest', 'RBF', 'Fischer']
            if self.classifier_type == 1:
                model_SVM = SVMClassifier(cv=self.cross_validation)
                model_MLP = MLP(cv=self.cross_validation)
                model_Logit = LogisticRegressor(cv=self.cross_validation)
                model_Forest = RandomForest(cv=self.cross_validation)
                model_RBF = RBFClassifier(cv=self.cross_validation)
                model_Fischer = FisherDiscriminant(cv=self.cross_validation)
                model = [model_SVM, model_MLP, model_Logit, model_Forest, model_RBF, model_Fischer]
            elif self.classifier_type == 2:
                model_SVM = SVMClassifier(cv=self.cross_validation)
                model_MLP = MLP(cv=self.cross_validation)
                model_Logit = LogisticRegressor(cv=self.cross_validation)
                model_Forest = RandomForest(cv=self.cross_validation)
                model_RBF = RBFClassifier(cv=self.cross_validation)
                model_Fischer = FisherDiscriminant(cv=self.cross_validation)
                model1 = [model_SVM, model_MLP, model_Logit, model_Forest, model_RBF, model_Fischer]
                model2 = copy.deepcopy(model1)
        else:
            raise SyntaxError('Invalid model name')

        if self.classifier_type == 1:
            return model, classifier_list
        elif self.classifier_type == 2:
            return model1, model2, classifier_list
Ejemplo n.º 29
0
def active_learn(exp_num, StrategyClass, subsample):
    # all strategies use same initial training data and model weights
    reinit_seed(prop.RANDOM_SEED)
    test_acc_list = []
    if prop.MODEL.lower() == "mlp":
        model = MLP().apply(weights_init).to(device)
    if prop.MODEL.lower() == "cnn":
        model = CNN().apply(weights_init).to(device)
    if prop.MODEL.lower() == "resnet18":
        model = models.resnet.ResNet18().to(device)
    init_weights = copy.deepcopy(model.state_dict())

    reinit_seed(exp_num * 10)
    dataset_pool, valid_dataset, test_dataset = get_data_splits()
    train_dataset, pool_dataset = stratified_split_dataset(
        dataset_pool, 2 * prop.NUM_CLASSES, prop.NUM_CLASSES)  #

    # initial data
    strategy = StrategyClass(dataset_pool, valid_dataset, test_dataset, device)
    # calculate the overlap of strategy with other strategies
    strategies = [
        MCDropoutSampling, EnsembleSampling, EntropySampling,
        LeastConfidenceSampling, CoreSetAltSampling, BadgeSampling
    ]
    overlapping_strategies = []
    for StrategyClass in strategies:
        overlapping_strategies.append(
            StrategyClass(dataset_pool, valid_dataset, test_dataset))
    t = trange(1,
               prop.NUM_ACQS + 1,
               desc="Aquisitions (size {})".format(prop.ACQ_SIZE),
               leave=True)
    for acq_num in t:
        model.load_state_dict(init_weights)

        test_acc = train_validate_model(model, device, train_dataset,
                                        valid_dataset, test_dataset)
        test_acc_list.append(test_acc)

        if subsample:
            subset_ind = np.random.choice(a=len(pool_dataset),
                                          size=prop.K,
                                          replace=False)
            pool_subset = make_tensordataset(pool_dataset, subset_ind)
            sel_ind, remain_ind = strategy.query(prop.ACQ_SIZE, model,
                                                 train_dataset, pool_subset)
            q_idxs = subset_ind[sel_ind]  # from subset to full pool
            remaining_ind = list(
                set(np.arange(len(pool_dataset))) - set(q_idxs))
            sel_dataset = make_tensordataset(pool_dataset, q_idxs)
            train_dataset = concat_datasets(train_dataset, sel_dataset)
            pool_dataset = make_tensordataset(pool_dataset, remaining_ind)
        else:
            # all strategies work on k-sized windows in semi-batch setting
            sel_ind, remaining_ind = strategy.query(prop.ACQ_SIZE, model,
                                                    train_dataset,
                                                    pool_dataset)
            sel_dataset = make_tensordataset(pool_dataset, sel_ind)
            pool_dataset = make_tensordataset(pool_dataset, remaining_ind)
            train_dataset = concat_datasets(train_dataset, sel_dataset)

        logging.info(
            "Accuracy for {} sampling and {} acquisition is {}".format(
                strategy.name, acq_num, test_acc))
    return test_acc_list
Ejemplo n.º 30
0
def create_model(mode='train', model_type='transformer'):
    if model_type == 'transformer':
        return SpeechTransformer(mode=mode,
                                 drop_rate=hparams.transformer_drop_rate)
    elif model_type == 'mlp':
        return MLP(mode, hparams.mlp_dropout_rate)
class MFEC:
    def __init__(self, env, args, device='cpu'):
        """
        Instantiate an MFEC Agent
        ----------
        env: gym.Env
            gym environment to train on
        args: args class from argparser
            args are from from train.py: see train.py for help with each arg
        device: string
            'cpu' or 'cuda:0' depending on use_cuda flag from train.py
        """
        self.environment_type = args.environment_type
        self.env = env
        self.actions = range(self.env.action_space.n)
        self.frames_to_stack = args.frames_to_stack
        self.Q_train_algo = args.Q_train_algo
        self.use_Q_max = args.use_Q_max
        self.force_knn = args.force_knn
        self.weight_neighbors = args.weight_neighbors
        self.delta = args.delta
        self.device = device
        self.rs = np.random.RandomState(args.seed)

        # Hyperparameters
        self.epsilon = args.initial_epsilon
        self.final_epsilon = args.final_epsilon
        self.epsilon_decay = args.epsilon_decay
        self.gamma = args.gamma
        self.lr = args.lr
        self.q_lr = args.q_lr

        # Autoencoder for state embedding network
        self.vae_batch_size = args.vae_batch_size  # batch size for training VAE
        self.vae_epochs = args.vae_epochs  # number of epochs to run VAE
        self.embedding_type = args.embedding_type
        self.SR_embedding_type = args.SR_embedding_type
        self.embedding_size = args.embedding_size
        self.in_height = args.in_height
        self.in_width = args.in_width

        if self.embedding_type == 'VAE':
            self.vae_train_frames = args.vae_train_frames
            self.vae_loss = VAELoss()
            self.vae_print_every = args.vae_print_every
            self.load_vae_from = args.load_vae_from
            self.vae_weights_file = args.vae_weights_file
            self.vae = VAE(self.frames_to_stack, self.embedding_size,
                           self.in_height, self.in_width)
            self.vae = self.vae.to(self.device)
            self.optimizer = get_optimizer(args.optimizer,
                                           self.vae.parameters(), self.lr)
        elif self.embedding_type == 'random':
            self.projection = self.rs.randn(
                self.embedding_size, self.in_height * self.in_width *
                self.frames_to_stack).astype(np.float32)
        elif self.embedding_type == 'SR':
            self.SR_train_algo = args.SR_train_algo
            self.SR_gamma = args.SR_gamma
            self.SR_epochs = args.SR_epochs
            self.SR_batch_size = args.SR_batch_size
            self.n_hidden = args.n_hidden
            self.SR_train_frames = args.SR_train_frames
            self.SR_filename = args.SR_filename
            if self.SR_embedding_type == 'random':
                self.projection = np.random.randn(
                    self.embedding_size,
                    self.in_height * self.in_width).astype(np.float32)
                if self.SR_train_algo == 'TD':
                    self.mlp = MLP(self.embedding_size, self.n_hidden)
                    self.mlp = self.mlp.to(self.device)
                    self.loss_fn = nn.MSELoss(reduction='mean')
                    params = self.mlp.parameters()
                    self.optimizer = get_optimizer(args.optimizer, params,
                                                   self.lr)

        # QEC
        self.max_memory = args.max_memory
        self.num_neighbors = args.num_neighbors
        self.qec = QEC(self.actions, self.max_memory, self.num_neighbors,
                       self.use_Q_max, self.force_knn, self.weight_neighbors,
                       self.delta, self.q_lr)

        #self.state = np.empty(self.embedding_size, self.projection.dtype)
        #self.action = int
        self.memory = []
        self.print_every = args.print_every
        self.episodes = 0

    def choose_action(self, values):
        """
        Choose epsilon-greedy policy according to Q-estimates
        """
        # Exploration
        if self.rs.random_sample() < self.epsilon:
            self.action = self.rs.choice(self.actions)

        # Exploitation
        else:
            best_actions = np.argwhere(values == np.max(values)).flatten()
            self.action = self.rs.choice(best_actions)

        return self.action

    def TD_update(self, prev_embedding, prev_action, reward, values, time):
        # On-policy value estimate of current state (epsiloln-greedy)
        # Expected Sarsa
        v_t = (1 -
               self.epsilon) * np.max(values) + self.epsilon * np.mean(values)
        value = reward + self.gamma * v_t
        self.qec.update(prev_embedding, prev_action, value, time - 1)

    def MC_update(self):
        value = 0.0
        for _ in range(len(self.memory)):
            experience = self.memory.pop()
            value = value * self.gamma + experience["reward"]
            self.qec.update(
                experience["state"],
                experience["action"],
                value,
                experience["time"],
            )

    def add_to_memory(self, state_embedding, action, reward, time):
        self.memory.append({
            "state": state_embedding,
            "action": action,
            "reward": reward,
            "time": time,
        })

    def run_episode(self):
        """
        Train an MFEC agent for a single episode:
            Interact with environment
            Perform update
        """
        self.episodes += 1
        RENDER_SPEED = 0.04
        RENDER = False

        episode_frames = 0
        total_reward = 0
        total_steps = 0

        # Update epsilon
        if self.epsilon > self.final_epsilon:
            self.epsilon = self.epsilon * self.epsilon_decay

        #self.env.seed(random.randint(0, 1000000))
        state = self.env.reset()
        if self.environment_type == 'fourrooms':
            fewest_steps = self.env.shortest_path_length(self.env.state)
        done = False
        time = 0
        while not done:
            time += 1
            if self.embedding_type == 'random':
                state = np.array(state).flatten()
                state_embedding = np.dot(self.projection, state)
            elif self.embedding_type == 'VAE':
                state = torch.tensor(state).permute(2, 0, 1)  #(H,W,C)->(C,H,W)
                state = state.unsqueeze(0).to(self.device)
                with torch.no_grad():
                    mu, logvar = self.vae.encoder(state)
                    state_embedding = torch.cat([mu, logvar], 1)
                    state_embedding = state_embedding.squeeze()
                    state_embedding = state_embedding.cpu().numpy()
            elif self.embedding_type == 'SR':
                if self.SR_train_algo == 'TD':
                    state = np.array(state).flatten()
                    state_embedding = np.dot(self.projection, state)
                    with torch.no_grad():
                        state_embedding = self.mlp(
                            torch.tensor(state_embedding)).cpu().numpy()
                elif self.SR_train_algo == 'DP':
                    s = self.env.state
                    state_embedding = self.true_SR_dict[s]
            state_embedding = state_embedding / np.linalg.norm(state_embedding)
            if RENDER:
                self.env.render()
                time.sleep(RENDER_SPEED)

            # Get estimated value of each action
            values = [
                self.qec.estimate(state_embedding, action)
                for action in self.actions
            ]

            action = self.choose_action(values)
            state, reward, done, _ = self.env.step(action)
            if self.Q_train_algo == 'MC':
                self.add_to_memory(state_embedding, action, reward, time)
            elif self.Q_train_algo == 'TD':
                if time > 1:
                    self.TD_update(prev_embedding, prev_action, prev_reward,
                                   values, time)
            prev_reward = reward
            prev_embedding = state_embedding
            prev_action = action
            total_reward += reward
            total_steps += 1
            episode_frames += self.env.skip

        if self.Q_train_algo == 'MC':
            self.MC_update()
        if self.episodes % self.print_every == 0:
            print("KNN usage:", np.mean(self.qec.knn_usage))
            self.qec.knn_usage = []
            print("Proportion of replace:", np.mean(self.qec.replace_usage))
            self.qec.replace_usage = []
        if self.environment_type == 'fourrooms':
            n_extra_steps = total_steps - fewest_steps
            return n_extra_steps, episode_frames, total_reward
        else:
            return episode_frames, total_reward

    def warmup(self):
        """
        Collect 1 million frames from random policy and train VAE
        """
        if self.embedding_type == 'VAE':
            if self.load_vae_from is not None:
                self.vae.load_state_dict(torch.load(self.load_vae_from))
                self.vae = self.vae.to(self.device)
            else:
                # Collect 1 million frames from random policy
                print("Generating dataset to train VAE from random policy")
                vae_data = []
                state = self.env.reset()
                total_frames = 0
                while total_frames < self.vae_train_frames:
                    action = random.randint(0, self.env.action_space.n - 1)
                    state, reward, done, _ = self.env.step(action)
                    vae_data.append(state)
                    total_frames += self.env.skip
                    if done:
                        state = self.env.reset()
                # Dataset, Dataloader for 1 million frames
                vae_data = torch.tensor(
                    vae_data
                )  # (N x H x W x C) - (1mill/skip X 84 X 84 X frames_to_stack)
                vae_data = vae_data.permute(0, 3, 1, 2)  # (N x C x H x W)
                vae_dataset = TensorDataset(vae_data)
                vae_dataloader = DataLoader(vae_dataset,
                                            batch_size=self.vae_batch_size,
                                            shuffle=True)
                # Training loop
                print("Training VAE")
                self.vae.train()
                for epoch in range(self.vae_epochs):
                    train_loss = 0
                    for batch_idx, batch in enumerate(vae_dataloader):
                        batch = batch[0].to(self.device)
                        self.optimizer.zero_grad()
                        recon_batch, mu, logvar = self.vae(batch)
                        loss = self.vae_loss(recon_batch, batch, mu, logvar)
                        train_loss += loss.item()
                        loss.backward()
                        self.optimizer.step()
                        if batch_idx % self.vae_print_every == 0:
                            msg = 'VAE Epoch: {} [{}/{}]\tLoss: {:.6f}'.format(
                                epoch, batch_idx * len(batch),
                                len(vae_dataloader.dataset),
                                loss.item() / len(batch))
                            print(msg)
                    print('====> Epoch {} Average loss: {:.4f}'.format(
                        epoch, train_loss / len(vae_dataloader.dataset)))
                    if self.vae_weights_file is not None:
                        torch.save(self.vae.state_dict(),
                                   self.vae_weights_file)
            self.vae.eval()
        elif self.embedding_type == 'SR':
            if self.SR_embedding_type == 'random':
                if self.SR_train_algo == 'TD':
                    total_frames = 0
                    transitions = []
                    while total_frames < self.SR_train_frames:
                        observation = self.env.reset()
                        s_t = self.env.state  # will not work on Atari
                        done = False
                        while not done:
                            action = np.random.randint(self.env.action_space.n)
                            observation, reward, done, _ = self.env.step(
                                action)
                            s_tp1 = self.env.state  # will not work on Atari
                            transitions.append((s_t, s_tp1))
                            total_frames += self.env.skip
                            s_t = s_tp1
                    # Dataset, Dataloader
                    dataset = SRDataset(self.env, self.projection, transitions)
                    dataloader = DataLoader(dataset,
                                            batch_size=self.SR_batch_size,
                                            shuffle=True)
                    train_losses = []
                    #Training loop
                    for epoch in range(self.SR_epochs):
                        for batch_idx, batch in enumerate(dataloader):
                            self.optimizer.zero_grad()
                            e_t, e_tp1 = batch
                            e_t = e_t.to(self.device)
                            e_tp1 = e_tp1.to(self.device)
                            mhat_t = self.mlp(e_t)
                            mhat_tp1 = self.mlp(e_tp1)
                            target = e_t + self.gamma * mhat_tp1.detach()
                            loss = self.loss_fn(mhat_t, target)
                            loss.backward()
                            self.optimizer.step()
                            train_losses.append(loss.item())
                        print("Epoch:", epoch, "Average loss",
                              np.mean(train_losses))

                    emb_reps = np.zeros(
                        [self.env.n_states, self.embedding_size])
                    SR_reps = np.zeros(
                        [self.env.n_states, self.embedding_size])
                    labels = []
                    room_size = self.env.room_size
                    for i, (state,
                            obs) in enumerate(self.env.state_dict.items()):
                        emb = np.dot(self.projection, obs.flatten())
                        emb_reps[i, :] = emb
                        with torch.no_grad():
                            emb = torch.tensor(emb).to(self.device)
                            SR = self.mlp(emb).cpu().numpy()
                        SR_reps[i, :] = SR
                        if state[0] < room_size + 1 and state[
                                1] < room_size + 1:
                            label = 0
                        elif state[0] > room_size + 1 and state[
                                1] < room_size + 1:
                            label = 1
                        elif state[0] < room_size + 1 and state[
                                1] > room_size + 1:
                            label = 2
                        elif state[0] > room_size + 1 and state[
                                1] > room_size + 1:
                            label = 3
                        else:
                            label = 4
                        labels.append(label)
                    np.save('%s_SR_reps.npy' % (self.SR_filename), SR_reps)
                    np.save('%s_emb_reps.npy' % (self.SR_filename), emb_reps)
                    np.save('%s_labels.npy' % (self.SR_filename), labels)
                elif self.SR_train_algo == 'MC':
                    pass
                elif self.SR_train_algo == 'DP':
                    # Use this to ensure same order every time
                    idx_to_state = {
                        i: state
                        for i, state in enumerate(self.env.state_dict.keys())
                    }
                    state_to_idx = {v: k for k, v in idx_to_state.items()}
                    T = np.zeros([self.env.n_states, self.env.n_states])
                    for i, s in idx_to_state.items():
                        for a in range(4):
                            self.env.state = s
                            _, _, _, _ = self.env.step(a)
                            s_tp1 = self.env.state
                            T[state_to_idx[s], state_to_idx[s_tp1]] += 0.25
                    true_SR = np.eye(self.env.n_states)
                    done = False
                    t = 0
                    while not done:
                        t += 1
                        new_SR = true_SR + (self.SR_gamma**t) * (np.matmul(
                            true_SR, T))
                        done = np.max(np.abs(true_SR - new_SR)) < 1e-10
                        true_SR = new_SR
                    self.true_SR_dict = {}
                    for s, obs in self.env.state_dict.items():
                        idx = state_to_idx[s]
                        self.true_SR_dict[s] = true_SR[idx, :]
        else:
            pass  # random projection doesn't require warmup