def __init__(self, params): super(BCAgent, self).__init__(params) # Initialize policy network pol_params = self.params['p-bc']['pol_params'] pol_params['input_size'] = self.N pol_params['output_size'] = self.M if 'final_activation' not in pol_params: pol_params['final_activation'] = torch.tanh self.pol = MLP(pol_params) # Create policy optimizer ppar = self.params['p-bc']['pol_optim'] self.pol_optim = torch.optim.Adam(self.pol.parameters(), lr=ppar['lr'], weight_decay=ppar['reg']) # Use a replay buffer that will save planner actions self.pol_buf = ReplayBuffer(self.N, self.M, self.params['p-bc']['buf_size']) # Logging (store cum_rew, cum_emp_rew) self.hist['pols'] = np.zeros((self.T, 2)) self.has_pol = True self.pol_cache = ()
def main(arguments): # load the features of the dataset features = datasets.load_breast_cancer().data # standardize the features features = StandardScaler().fit_transform(features) # get the number of features num_features = features.shape[1] # load the labels for the features labels = datasets.load_breast_cancer().target train_features, test_features, train_labels, test_labels = train_test_split( features, labels, test_size=0.30, stratify=labels) model = MLP(alpha=LEARNING_RATE, batch_size=BATCH_SIZE, node_size=NUM_NODES, num_classes=NUM_CLASSES, num_features=num_features) model.train(num_epochs=arguments.num_epochs, log_path=arguments.log_path, train_data=[train_features, train_labels], train_size=train_features.shape[0], test_data=[test_features, test_labels], test_size=test_features.shape[0], result_path=arguments.result_path)
def main(args): features, targets = generate_synthetic_data(args.model_type, args.num_samples) # split train/test sets x_train, x_val, y_train, y_val = train_test_split(features, targets, test_size=0.2) db_train = tf.data.Dataset.from_tensor_slices( (x_train, y_train)).batch(args.batch_size_train) db_val = tf.data.Dataset.from_tensor_slices( (x_val, y_val)).batch(args.batch_size_eval) if args.model_type == 'MLP': model = MLP(num_inputs=Constants._MLP_NUM_FEATURES, num_layers=Constants._MLP_NUM_LAYERS, num_dims=Constants._MLP_NUM_DIMS, num_outputs=Constants._NUM_TARGETS, dropout_rate=args.dropout) elif args.model_type == 'TCN': model = TCN(nb_filters=Constants._TCN_NUM_FILTERS, kernel_size=Constants._TCN_KERNEL_SIZE, nb_stacks=Constants._TCN_NUM_STACK, dilations=Constants._TCN_DIALATIONS, padding=Constants._TCN_PADDING, dropout_rate=args.lr) criteon = keras.losses.MeanSquaredError() optimizer = keras.optimizers.Adam(learning_rate=args.lr) for epoch in range(args.max_epoch): for step, (x, y) in enumerate(db_train): with tf.GradientTape() as tape: logits = model(x) loss = criteon(y, logits) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) if step % 100 == 0: print('Epoch: {}, Step: {}/{}, Loss: {}'.format( epoch, step, int(x_train.shape[0] / args.batch_size_train), loss)) # Perform inference and measure the speed every epoch start_time = time.time() for _, (x, _) in enumerate(db_val): _ = model.predict(x) end_time = time.time() print("Inference speed: {} samples/s\n".format( x_val.shape[0] / (end_time - start_time)))
def get_model(format, optimised=True) -> AbstractModel: if format == 'LogisticRegression': return LogisticRegressionModel(optimised) if format == 'RandomForest': return RandomForestModel(optimised) if format == 'NaiveBayes': return NaiveBayes(optimised) if format == 'GradientBoosting': return GradientBoosting(optimised) if format == 'SVM': return SVM(optimised) if format == 'OneClassSVM': return OneClassSVMModel(optimised) if format == 'DecisionTree': return DecisionTree(optimised) if format == 'AdaBoost': return AdaBoost(optimised) if format == 'GaussianProcess': return GaussianProcess(optimised) if format == 'MLP': return MLP(optimised) if format == 'KNeighbors': return KNeighbors(optimised) if format == 'QuadraticDiscriminant': return QuadraticDiscriminant(optimised) if format == 'Dummy': return Dummy(optimised) else: raise ValueError(format)
def define_model(self, model_name, ps): if model_name == 'catboost': return GBDTCatBoost(self.task, **ps) elif model_name == 'lightgbm': return GBDTLGBM(self.task, **ps) elif model_name == 'mlp': return MLP(self.task, **ps) elif model_name == 'gnn': return GNN(self.task, **ps) elif model_name == 'resgnn': gbdt = GBDTCatBoost(self.task) gbdt.fit(self.X, self.y, self.train_mask, self.val_mask, self.test_mask, cat_features=self.cat_features, num_epochs=1000, patience=100, plot=False, verbose=False, loss_fn=None, metric_name='loss' if self.task == 'regression' else 'accuracy') return GNN(task=self.task, gbdt_predictions=gbdt.model.predict(self.X), **ps) elif model_name == 'bgnn': return BGNN(self.task, **ps)
def main(args): exp_info = exp_config.Experiment(args.dataset) paths = exp_info.paths args.paths = paths args.metadata = exp_info.metadata np.random.seed(args.seed) torch.manual_seed(args.seed) batch_size = args.batch_size args.batch_size = 1 feature_size, train_loader, val_loader, test_loader, all_loader = exp_info.get_dataset( args, save=True) label_num = exp_info.get_label_num(args) hidden_size = 256 hidden_layers = 2 args.resume = os.path.join( paths.checkpoint_root, 'detection_{}_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}'.format( args.task, args.model, args.epochs, args.lr, args.batch_size, args.lr_decay, 1 if not args.subsample else args.subsample, args.dropout_rate)) if args.model == 'lstm': detection_model = BiLSTM(feature_size, hidden_size, hidden_layers, label_num) else: detection_model = MLP(feature_size, hidden_size, label_num) detection_model = torch.nn.DataParallel(detection_model) logutils.load_checkpoint(args, detection_model) args.resume = os.path.join( paths.checkpoint_root, 'frame_prediction_{}_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}_pd{}'.format( args.task, args.model, args.epochs, args.lr, args.batch_size, args.lr_decay, 1 if not args.subsample else args.subsample, args.dropout_rate, args.using_pred_duration)) if args.model == 'lstm': prediction_model = LSTM_Pred(feature_size, hidden_size, hidden_layers, label_num) else: prediction_model = MLP(feature_size, hidden_size, label_num) prediction_model = torch.nn.DataParallel(prediction_model) logutils.load_checkpoint(args, prediction_model) validate(test_loader, detection_model, prediction_model, args=args)
def get_model(model): """ Get Model instance """ assert model in ['CNN', 'MLP'] if model == 'CNN': return Char_CNN(config, fc_layers, filter_sizes) else: return MLP(config, fc_layers)
def main(args): exp_info = exp_config.Experiment(args.dataset) paths = exp_info.paths args.paths = paths args.resume = os.path.join( paths.checkpoint_root, 'frame_prediction_{}_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}_pd{}'.format( args.task, args.model, args.epochs, args.lr, args.batch_size, args.lr_decay, 1 if not args.subsample else args.subsample, args.dropout_rate, args.pred_duration)) np.random.seed(args.seed) torch.manual_seed(args.seed) feature_size, train_loader, val_loader, test_loader, _ = exp_info.get_dataset( args) label_num = exp_info.get_label_num(args) criterion = torch.nn.CrossEntropyLoss() hidden_size = 256 hidden_layers = 2 if args.model == 'lstm': model = LSTM_Pred(feature_size, hidden_size, hidden_layers, label_num) else: model = MLP(feature_size, hidden_size, label_num) optimizer = optim.Adam(model.parameters(), lr=args.lr) scheduler = lr_scheduler.StepLR(optimizer, args.lr_freq, args.lr_decay) model = torch.nn.DataParallel(model) if args.cuda: criterion = criterion.cuda() model = model.cuda() if args.resume: utils.load_checkpoint(args, model, optimizer, scheduler) best_prec = 0.0 if args.eval: validate(test_loader, model, args, test=True) else: for epoch in tqdm(range(args.start_epoch, args.epochs), desc='Epochs Loop'): train(train_loader, model, criterion, optimizer, epoch, args) prec = validate(val_loader, model, args) scheduler.step() best_prec = max(prec, best_prec) is_best = (best_prec == prec) tqdm.write('Best precision: {:.03f}'.format(best_prec)) if (epoch + 1) % args.save_interval == 0: utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec': best_prec, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict() }, is_best, args)
def __init__(self, params): super(VPGAgent, self).__init__(params) self.H = self.params['pg']['H'] self.lam = self.params['pg']['lam'] # Initialize policy network pol_params = self.params['pg']['pol_params'] pol_params['input_size'] = self.N pol_params['output_size'] = self.M if 'final_activation' not in pol_params: pol_params['final_activation'] = torch.tanh self.pol = MLP(pol_params) # Std's are not dependent on state init_log_std = -0.8 * torch.ones(self.M) # ~0.45 self.log_std = torch.nn.Parameter(init_log_std, requires_grad=True) # Create policy optimizer ppar = self.params['pg']['pol_optim'] self.pol_params = list(self.pol.parameters()) + [self.log_std] self.pol_optim = torch.optim.Adam(self.pol_params, lr=ppar['lr'], weight_decay=ppar['reg']) # Create value function and optimizer val_params = self.params['pg']['val_params'] val_params['input_size'] = self.N val_params['output_size'] = 1 self.val = MLP(val_params) vpar = self.params['pg']['val_optim'] self.val_optim = torch.optim.Adam(self.val.parameters(), lr=vpar['lr'], weight_decay=vpar['reg']) # Logging self.hist['ent'] = np.zeros(self.T)
def main(arguments): # load the features of the dataset features = datasets.load_breast_cancer().data # standardize the features features = StandardScaler().fit_transform(features) # get the number of features num_features = features.shape[1] # load the labels for the features labels = datasets.load_breast_cancer().target train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.30, stratify=labels) model = MLP(alpha=LEARNING_RATE, batch_size=BATCH_SIZE, node_size=NUM_NODES, num_classes=NUM_CLASSES, num_features=num_features) model.train(num_epochs=arguments.num_epochs, log_path=arguments.log_path, train_data=[train_features, train_labels], train_size=train_features.shape[0], test_data=[test_features, test_labels], test_size=test_features.shape[0], result_path=arguments.result_path)
def __init__(self, cnn_args, mlp_args): super(CNN, self).__init__() # embedding layer self.embedding_dim = cnn_args['emb_dim'] self.embedding = nn.Embedding(cnn_args['vocab_size'], self.embedding_dim) # initialize with pretrained embeddings print("Initializing with pretrained embeddings") self.embedding.weight.data.copy_(cnn_args['pretrained_emb']) # Dropout definition self.dropout = nn.Dropout(0.25) # CNN parameters definition # Kernel sizes self.kernel_1 = 2 self.kernel_2 = 3 self.kernel_3 = 4 self.kernel_4 = 5 # Num kernels for each convolution size self.seq_len = cnn_args['text_len'] # Output size for each convolution self.out_channels = cnn_args['num_kernel'] # Number of strides for each convolution self.stride = cnn_args['stride'] # Convolution layers definition self.conv_1 = nn.Conv1d(self.seq_len, self.out_channels, self.kernel_1, self.stride) self.conv_2 = nn.Conv1d(self.seq_len, self.out_channels, self.kernel_2, self.stride) self.conv_3 = nn.Conv1d(self.seq_len, self.out_channels, self.kernel_3, self.stride) self.conv_4 = nn.Conv1d(self.seq_len, self.out_channels, self.kernel_4, self.stride) # Max pooling layers definition self.pool_1 = nn.MaxPool1d(self.kernel_1, self.stride) self.pool_2 = nn.MaxPool1d(self.kernel_2, self.stride) self.pool_3 = nn.MaxPool1d(self.kernel_3, self.stride) self.pool_4 = nn.MaxPool1d(self.kernel_4, self.stride) # MLP classfier mlp_input_size = self.in_features_fc() #print("mlp_input_size:", mlp_input_size) self.mlp = MLP(input_size=mlp_input_size, hidden_size=mlp_args['hidden_size'], num_classes=mlp_args['num_classes'])
def get_model(args): if args.model == "mlp": return MLP(args.input_size * 2, args.hidden_size, args.dropout, args.output_size) elif args.model == "attention": return Attention(args.input_size * 2, args.hidden_size[0], args.layers, args.dropout, args.output_size, gpu=args.gpu) elif args.model == 'linear': return Linear(args.input_size * 2, args.output_size) else: assert False
def get_model(config, args, seq_indexer, label_indexer): if config['type'] == 'RNN': return TextRNNAttn(embedding_alphabet=seq_indexer, gpu=args.gpu, feat_num=label_indexer.__len__(), **config['model']) elif config['type'] == 'CNN': return TextCNN(embedding_alphabet=seq_indexer, gpu=args.gpu, feat_num=label_indexer.__len__(), **config['model']) elif config['type'] == 'MLP': return MLP(embedding_alphabet=seq_indexer, gpu=args.gpu, feat_num=label_indexer.__len__(), **config['model']) else: raise RuntimeError('no model')
def __init__(self, kernelSize=11, featureSize=1024): super().__init__() assert kernelSize % 2 == 1, "kernel should be odd" self.conv1 = nn.Conv1d(featureSize, 64, kernelSize, padding=kernelSize // 2) self.maxpool1 = nn.MaxPool1d(2) self.conv2 = nn.Conv1d(64, 96, kernelSize, padding=kernelSize // 2) self.maxpool2 = nn.MaxPool1d(2) self.upsample1 = nn.Upsample(scale_factor=2, mode="nearest") self.conv3 = nn.Conv1d(96, 64, kernelSize, padding=kernelSize // 2) self.upsample2 = nn.Upsample(scale_factor=2, mode="nearest") self.conv4 = nn.Conv1d(64, featureSize, kernelSize, padding=kernelSize // 2) self.classifier = MLP(featureSize) self.featureSize = featureSize
def __init__(self, lstm_args, mlp_args): super(LSTM, self).__init__() # setting hyperparams self.hidden_dim = lstm_args['hidden_size'] self.dropout_prob = lstm_args['dropout'] self.use_gru = lstm_args['gru'] self.embedding_dim = lstm_args['emb_dim'] # embedding layer self.embedding = nn.Embedding(lstm_args['vocab_size'], self.embedding_dim) # initialize with pretrained word emb if provided if 'pretrained_emb' in lstm_args: print("Initializing with pretrained embeddings") self.embedding.weight.data.copy_(lstm_args['pretrained_emb']) # biLSTM layer + dropout self.lstm = nn.LSTM(input_size=self.embedding_dim, hidden_size=self.hidden_dim, num_layers=2, batch_first=True, bidirectional=True) self.drop = nn.Dropout(p=self.dropout_prob) # GRU layer self.gru = nn.GRU(input_size=self.embedding_dim, hidden_size=self.hidden_dim, num_layers=2, batch_first=True, bidirectional=True, dropout=self.dropout_prob) # fully-connected linear layer mlp_input_size = 2 * self.hidden_dim self.mlp = MLP(input_size=mlp_input_size, hidden_size=mlp_args['hidden_size'], num_classes=mlp_args['num_classes'])
def __init__(self, params): self.params = params self.kappa = self.params['kappa'] self.dtype = self.params['dtype'] self.device = self.params['device'] self.models = [] self.priors = [] self.optims = [] for i in range(self.params['ens_size']): model = MLP(self.params['model_params']).to(device=self.device) self.models.append(model) self.optims.append( torch.optim.Adam(model.parameters(), lr=self.params['lr'], weight_decay=self.params['reg'])) prior = MLP(self.params['model_params']).to(device=self.device) prior.eval() self.priors.append(prior)
class BCAgent(POLOAgent): """ An agent extending upon POLO that uses behavior cloning on the planner predicted actions as a prior to MPC. """ def __init__(self, params): super(BCAgent, self).__init__(params) # Initialize policy network pol_params = self.params['p-bc']['pol_params'] pol_params['input_size'] = self.N pol_params['output_size'] = self.M if 'final_activation' not in pol_params: pol_params['final_activation'] = torch.tanh self.pol = MLP(pol_params) # Create policy optimizer ppar = self.params['p-bc']['pol_optim'] self.pol_optim = torch.optim.Adam(self.pol.parameters(), lr=ppar['lr'], weight_decay=ppar['reg']) # Use a replay buffer that will save planner actions self.pol_buf = ReplayBuffer(self.N, self.M, self.params['p-bc']['buf_size']) # Logging (store cum_rew, cum_emp_rew) self.hist['pols'] = np.zeros((self.T, 2)) self.has_pol = True self.pol_cache = () def get_action(self): """ BCAgent generates a planned trajectory using the behavior-cloned policy and then optimizes it via MPC. """ self.pol.eval() # Run a rollout using the policy starting from the current state infos = self.get_traj_info() self.hist['pols'][self.time] = infos[3:5] self.pol_cache = (infos[0], infos[2]) self.prior_actions = infos[1] # Generate trajectory via MPC with the prior actions as a prior action = super(BCAgent, self).get_action(prior=self.prior_actions) # Add final planning trajectory to BC buffer fin_states, fin_rews = self.cache[2], self.cache[3] fin_states = np.concatenate(([self.prev_obs], fin_states[1:])) pb_pct = self.params['p-bc']['pb_pct'] pb_len = int(pb_pct * fin_states.shape[0]) for t in range(pb_len): self.pol_buf.update(fin_states[t], fin_states[t + 1], fin_rews[t], self.planned_actions[t], False) return action def do_updates(self): """ Learn from the saved buffer of planned actions. """ super(BCAgent, self).do_updates() if self.time % self.params['p-bc']['update_freq'] == 0: self.update_pol() def update_pol(self): """ Update the policy via BC on the planner actions. """ self.pol.train() params = self.params['p-bc'] # Generate batches for training size = min(self.pol_buf.size, self.pol_buf.total_in) num_inds = params['batch_size'] * params['grad_steps'] inds = np.random.randint(0, size, size=num_inds) states = self.pol_buf.buffer['s'][inds] acts = self.pol_buf.buffer['a'][inds] states = torch.tensor(states, dtype=self.dtype) actions = torch.tensor(acts, dtype=self.dtype) for i in range(params['grad_steps']): bi, ei = i * params['batch_size'], (i + 1) * params['batch_size'] # Train based on L2 distance between actions and predictions preds = self.pol.forward(states[bi:ei]) preds = torch.squeeze(preds, dim=-1) targets = torch.squeeze(actions[bi:ei], dim=-1) loss = torch.nn.functional.mse_loss(preds, targets) self.pol_optim.zero_grad() loss.backward() self.pol_optim.step() def get_traj_info(self): """ Run the policy for a full trajectory and return details about the trajectory. """ env_state = self.env.sim.get_state() if self.mujoco else None infos = traj.eval_traj(copy.deepcopy(self.env), env_state, self.prev_obs, mujoco=self.mujoco, perturb=self.perturb, H=self.H, gamma=self.gamma, act_mode='deter', pt=(self.pol, 0), terminal=self.val_ens, tvel=self.tvel) return infos def print_logs(self): """ BC-specific logging information. """ bi, ei = super(BCAgent, self).print_logs() self.print('BC metrics', mode='head') self.print('policy traj rew', self.hist['pols'][self.time - 1][0]) self.print('policy traj emp rew', self.hist['pols'][self.time - 1][1]) return bi, ei def test_policy(self): """ Run the BC action selection mechanism. """ env = copy.deepcopy(self.env) obs = env.reset() if self.tvel is not None: env.set_target_vel(self.tvel) obs = env._get_obs() env_state = env.sim.get_state() if self.mujoco else None infos = traj.eval_traj(env, env_state, obs, mujoco=self.mujoco, perturb=self.perturb, H=self.eval_len, gamma=1, act_mode='deter', pt=(self.pol, 0), tvel=self.tvel) self.hist['pol_test'][self.time] = infos[3]
'dev'], datasets['test'] seq_indexer = SeqIndexerBaseEmbeddings("glove", args.embedding_dir, args.embedding_dim, ' ') seq_indexer.load_embeddings_from_file() label_indexer = SeqIndexerBase("laebl", False, False) label_indexer.add_instance(dataset.train_label) if args.load is not None: model = torch.load(args.load) if args.gpu >= 0: model.cuda(device=args.gpu) else: if args.model == 'MLP': model = MLP(embedding_indexer=seq_indexer, gpu=args.gpu, feat_num=label_indexer.__len__(), dropout=args.dropout_rate) elif args.model == 'CNN': model = TextCNN(embedding_indexer=seq_indexer, gpu=args.gpu, feat_num=label_indexer.__len__(), dropout=args.dropout_rate, kernel_size=[2, 3, 5]) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
parser.add_argument( '-v', '--validation', dest='val', type=float, default=10.0, help='Percent of the data that is used as validation (0-100)') return parser.parse_args() if __name__ == '__main__': args = get_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') net = MLP(1, 3) if args.load: net.lead_state_dict(torch.load(args.load, map_location=device)) net.to(device=device) try: train_net(net=net, epochs=args.epochs, batch_size=args.batchsize, lr=args.lr, device=device, val_percent=args.val / 100) except KeyboardInterrupt: torch.save(net.state_dict(), 'INTERRUPTED.pth')
def main(): global opt opt = parser.parse_args() use_gpu = torch.cuda.is_available() # Set up logging if opt.savepath == None: path = os.path.join('save', datetime.datetime.now().strftime("%d-%H-%M-%S")) else: path = opt.savepath os.makedirs(path, exist_ok=True) logger = utils.Logger(path) # Keep track of accuracies val_accuracies = [] test_accuracies = [] # Seed for cross-val split seed = random.randint(0,10000) if opt.seed < 0 else opt.seed logger.log('SEED: {}'.format(seed), stdout=False) # Load data if opt.preloaded_splits.lower() == 'none': start = time.time() data, label = get_data(opt.data, opt.label) logger.log('Data loaded in {:.1f}s\n'.format(time.time() - start)) else: data, label = np.zeros(5), np.zeros(5) # dummy labels for iterating over logger.log('Using preloaded splits\n') # Create cross-validation splits kf = StratifiedKFold(n_splits=5, random_state=seed, shuffle=True) # Cross validate for i, (train_index, test_index) in enumerate(kf.split(data, label)): # Log split logger.log('------------- SPLIT {} --------------\n'.format(i+1)) # Train / test split (ignored if opt.preloaded_splits is not 'none') X, X_test = data[train_index], data[test_index] y, y_test = label[train_index], label[test_index] # Perform PCA and generate dataloader or load from saved file start = time.time() apply_pca_transform = (opt.arch not in ['exp']) train_loader, val_loader, test_loader, pca_components, input_size, num_classes, pca_matrix = \ get_dataloader(opt.preloaded_splits, X, X_test, y, y_test, batch_size=opt.b, val_fraction=opt.val_fraction, pca_components=opt.pca_components, apply_pca_transform=apply_pca_transform, imputation_dim=opt.impute, split=i, save_dataset=(not opt.no_save_dataset)) logger.log('Dataloader loaded in {:.1f}s\n'.format(time.time() - start)) # Model arch = opt.arch.lower() assert arch in ['logreg', 'mlp', 'exp'] if arch == 'logreg': model = LogisticRegression(input_size, opt.pca_components, num_classes) elif arch == 'mlp': model = MLP(input_size, opt.hidden_size, num_classes, opt.dp) elif arch == 'exp': model = ExperimentalModel(input_size, opt.pca_components, opt.hidden_size, num_classes, opt.dp) # Pretrained / Initialization if opt.model is not None and os.path.isfile(opt.model): # Pretrained model model.load_state_dict(torch.load(opt.model)) logger.log('Loaded pretrained model.', stdout=(i==0)) else: # Initialize model uniformly for p in model.parameters(): p.data.uniform_(-0.1, 0.1) logger.log('Initialized model from scratch.', stdout=(i==0)) model = model.cuda() if use_gpu else model print(model) # Initialize first layer with PCA and fix PCA weights if model requires if opt.arch in ['exp']: model.first_layer.weight.data.copy_(pca_matrix) logger.log('Initialized first layer as PCA', stdout=(i==0)) if not opt.finetune_pca: model.first_layer.weight.requires_grad = False logger.log('Fixed PCA weights', stdout=(i==0)) # Loss function and optimizer criterion = nn.CrossEntropyLoss(size_average=False) optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr, weight_decay=opt.wd) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=opt.lr_decay_patience, factor=opt.lr_decay_factor, verbose=True, cooldown=opt.lr_decay_cooldown) # Log parameters logger.log('COMMAND LINE ARGS: ' + ' '.join(sys.argv), stdout=False) logger.log('ARGS: {}\nOPTIMIZER: {}\nLEARNING RATE: {}\nSCHEDULER: {}\nMODEL: {}\n'.format( opt, optimizer, opt.lr, vars(scheduler), model), stdout=False) # If specified, only evaluate model if opt.evaluate: assert opt.model != None, 'no pretrained model to evaluate' total_correct, total, _ = validate(model, val_loader, criterion) logger.log('Accuracy: {:.3f} \t Total correct: {} \t Total: {}'.format( total_correct/total, total_correct, total)) return # Train model start_time = time.time() best_acc = train(model, train_loader, val_loader, optimizer, criterion, logger, num_epochs=opt.epochs, print_freq=opt.print_freq, model_id=i) logger.log('Best train accuracy: {:.2f}% \t Finished split {} in {:.2f}s\n'.format( 100 * best_acc, i+1, time.time() - start_time)) val_accuracies.append(best_acc) # Best evaluation on validation set best_model_path = os.path.join(path, 'model_{}.pth'.format(i)) model.load_state_dict(torch.load(best_model_path)) # load best model total_correct, total, _ = validate(model, val_loader, criterion) # check val set logger.log('Val Accuracy: {:.3f} \t Total correct: {} \t Total: {}'.format( total_correct/total, total_correct, total)) # Optionally also evaluate on test set if opt.test: total_correct, total, visualize = validate(model, test_loader, criterion, visualize=True) # run test set logger.log('Test Accuracy: {:.3f} \t Total correct: {} \t Total: {}\n'.format( total_correct/total, total_correct, total)) logger.save_model(visualize, 'visualize_{}.pth'.format(i)) test_accuracies.append(total_correct/total) # Log after training logger.log('Val Accuracies: {}'.format(val_accuracies)) logger.log('Test Accuracies: {}'.format(test_accuracies)) logger.log('Run id: {} \t Test Accuracies: {}'.format(opt.id, test_accuracies))
def __init__(self, cfg): trainLoader, valLoader = get_dataloaders( args.trainNormalFolder, args.trainNormalAnnotations, args.trainAbnormalFolder, args.trainAbnormalAnnotations, args.trainNormalTopK, args.valNormalFolder, args.valNormalAnnotations, args.valAbnormalFolder, args.valAbnormalAnnotations, args.valNormalTopK, args.batchSize, args.numWorkers, args.model, args.windowSize, args.subWindows, args.featureSize, args.maxVideoSize) self.modelType = args.model self.trainLoader = trainLoader self.valLoader = valLoader self.expFolder = args.expFolder self.maskValue = args.maskValue self.stepCounter = 0 self.bestAUC = 0 self.noNormalSegmentation = args.noNormalSegmentation self.lossType = args.loss if args.model == "mlp": self.model = MLP(featureSize=args.featureSize) elif args.model == "tcn": self.model = EDTCN(featureSize=args.featureSize, kernelSize=args.kernelSize) elif args.model == "mstcn": self.model = MultiStageModel(num_stages=args.numStages, num_layers=args.numLayers, num_f_maps=args.numFeatureMaps, dim=args.featureSize, ssRepeat=args.firstStageRepeat) print("[Info] MS-TCN W{}-S{}-L{} have been created".format( args.windowSize, args.numStages, args.numLayers)) # elif args.model == "mcbtcn": # self.model = MultiClassBinaryTCN(numClassStages=args.numClassStages, numBinaryStages=args.numBinaryStages, # num_layers=args.numLayers, num_f_maps=args.numFeatureMaps, # dim=args.featureSize, numClasses=16) self.model = self.model.float() # if torch.cuda.is_available(): # self.model = self.model.cuda() if args.optimizer == "adam": self.optimizer = torch.optim.Adam(self.model.parameters(), lr=args.learningRate, betas=(0.5, 0.9), eps=1e-08, weight_decay=0, amsgrad=False) self.scheduler = torch.optim.lr_scheduler.StepLR( self.optimizer, step_size=args.schedulerStepSize, gamma=args.schedulerGamma) if args.modelPath: self.loadCheckpoint(args.modelPath) print("[Info] Model have been loaded at {}".format(args.modelPath)) if torch.cuda.is_available(): self.model = self.model.cuda() self.model = self.model.float() self.ceLoss = torch.nn.CrossEntropyLoss(ignore_index=-1) self.ASLoss = TemporalHardPairLoss(max_violation=True, margin=args.adLossMargin, measure="output") self.mseLoss = torch.nn.MSELoss() self.lossLambda = args.adLossLambda self.writer = None if not args.test: self.writer = SummaryWriter(log_dir=args.expFolder)
def choose_model(conf, G, features, labels, byte_idx_train, labels_one_hot): if conf['model_name'] == 'GCN': model = GCN(g=G, in_feats=features.shape[1], n_hidden=conf['hidden'], n_classes=labels.max().item() + 1, n_layers=1, activation=F.relu, dropout=conf['dropout']).to(conf['device']) elif conf['model_name'] == 'GAT': num_heads = 8 num_layers = 1 num_out_heads = 1 heads = ([num_heads] * num_layers) + [num_out_heads] model = GAT( g=G, num_layers=num_layers, in_dim=G.ndata['feat'].shape[1], num_hidden=8, num_classes=labels.max().item() + 1, heads=heads, activation=F.relu, feat_drop=0.6, attn_drop=0.6, negative_slope=0.2, # negative slope of leaky relu residual=False).to(conf['device']) elif conf['model_name'] == 'PLP': model = PLP(g=G, num_layers=conf['num_layers'], in_dim=G.ndata['feat'].shape[1], emb_dim=conf['emb_dim'], num_classes=labels.max().item() + 1, activation=F.relu, feat_drop=conf['feat_drop'], attn_drop=conf['attn_drop'], residual=False, byte_idx_train=byte_idx_train, labels_one_hot=labels_one_hot, ptype=conf['ptype'], mlp_layers=conf['mlp_layers']).to(conf['device']) elif conf['model_name'] == 'GraphSAGE': model = GraphSAGE(in_feats=G.ndata['feat'].shape[1], n_hidden=16, n_classes=labels.max().item() + 1, n_layers=1, activation=F.relu, dropout=0.5, aggregator_type=conf['agg_type']).to(conf['device']) elif conf['model_name'] == 'APPNP': model = APPNP(g=G, in_feats=G.ndata['feat'].shape[1], hiddens=[64], n_classes=labels.max().item() + 1, activation=F.relu, feat_drop=0.5, edge_drop=0.5, alpha=0.1, k=10).to(conf['device']) elif conf['model_name'] == 'LogReg': model = MLP(num_layers=1, input_dim=G.ndata['feat'].shape[1], hidden_dim=None, output_dim=labels.max().item() + 1, dropout=0).to(conf['device']) elif conf['model_name'] == 'MLP': model = MLP(num_layers=2, input_dim=G.ndata['feat'].shape[1], hidden_dim=conf['hidden'], output_dim=labels.max().item() + 1, dropout=conf['dropout']).to(conf['device']) else: raise ValueError(f'Undefined Model.') return model
def get_model(self, model_cfg): model = MLP(featureSize=model_cfg.feature_size) return model
def run_episode(strategies, policy, beta, device, num_worker): states, actions = [], [] # all strategies use same initial training data and model weights reinit_seed(prop.RANDOM_SEED) if prop.MODEL == "MLP": model = MLP().apply(weights_init).to(device) if prop.MODEL == "CNN": model = CNN().apply(weights_init).to(device) if prop.MODEL == "RESNET18": model = models.resnet.ResNet18().to(device) init_weights = deepcopy(model.state_dict()) # re-init seed was here before use_learner = True if np.random.rand(1) > beta else False if use_learner: policy = policy.to( device) # load policy only when learner is used for states dataset_pool, valid_dataset, test_dataset = get_policy_training_splits() train_dataset, pool_dataset = stratified_split_dataset( dataset_pool, prop.INIT_SIZE, prop.NUM_CLASSES) # Initial sampling if prop.SINGLE_HEAD: my_strategies = [] for StrategyClass in strategies: my_strategies.append( StrategyClass(dataset_pool, valid_dataset, test_dataset)) if prop.CLUSTER_EXPERT_HEAD: UncertaintyStrategieClasses, DiversityStrategieClasses = strategies un_strategies = [] di_strategies = [] for StrategyClass in UncertaintyStrategieClasses: un_strategies.append( StrategyClass(dataset_pool, valid_dataset, test_dataset)) for StrategyClass in DiversityStrategieClasses: di_strategies.append( StrategyClass(dataset_pool, valid_dataset, test_dataset)) if prop.CLUSTERING_AUX_LOSS_HEAD: my_strategies = [] for StrategyClass in strategies: my_strategies.append( StrategyClass(dataset_pool, valid_dataset, test_dataset)) init_acc = train_validate_model(model, device, train_dataset, valid_dataset, test_dataset) t = trange(1, prop.NUM_ACQS + 1, desc="Aquisitions (size {})".format(prop.ACQ_SIZE), leave=True) for acq_num in t: subset_ind = np.random.choice(a=len(pool_dataset), size=prop.K, replace=False) pool_subset = make_tensordataset(pool_dataset, subset_ind) if prop.CLUSTER_EXPERT_HEAD: un_sel_ind = expert(acq_num, model, init_weights, un_strategies, train_dataset, pool_subset, valid_dataset, test_dataset, device) di_sel_ind = expert(acq_num, model, init_weights, un_strategies, train_dataset, pool_subset, valid_dataset, test_dataset, device) state, action = get_state_action(model, train_dataset, pool_subset, un_sel_ind=un_sel_ind, di_sel_ind=di_sel_ind) if prop.SINGLE_HEAD: sel_ind = expert(acq_num, model, init_weights, my_strategies, train_dataset, pool_subset, valid_dataset, test_dataset, device) state, action = get_state_action(model, train_dataset, pool_subset, sel_ind=sel_ind) if prop.CLUSTERING_AUX_LOSS_HEAD: sel_ind = expert(acq_num, model, init_weights, my_strategies, train_dataset, pool_subset, valid_dataset, test_dataset, device) state, action = get_state_action(model, train_dataset, pool_subset, sel_ind=sel_ind, clustering=None) # not implemented states.append(state) actions.append(action) if use_learner: with torch.no_grad(): if prop.SINGLE_HEAD: policy_outputs = policy(state.to(device)).flatten() sel_ind = torch.topk(policy_outputs, prop.ACQ_SIZE)[1].cpu().numpy() if prop.CLUSTER_EXPERT_HEAD: policy_output_uncertainty, policy_output_diversity = policy( state.to(device)) # clustering_space = policy_output_diversity.reshape(prop.K, prop.POLICY_OUTPUT_SIZE) # one topk for uncertainty, one topk for diversity diversity_selection = torch.topk( policy_output_diversity.reshape(prop.K), int(prop.ACQ_SIZE / 2.0))[1].cpu().numpy() uncertainty_selection = torch.topk( policy_output_uncertainty.reshape(prop.K), int(prop.ACQ_SIZE / 2.0))[1].cpu().numpy() sel_ind = (uncertainty_selection, diversity_selection) if prop.CLUSTERING_AUX_LOSS_HEAD: # not implemented policy_outputs = policy(state.to(device)).flatten() sel_ind = torch.topk(policy_outputs, prop.ACQ_SIZE)[1].cpu().numpy() if prop.SINGLE_HEAD: q_idxs = subset_ind[sel_ind] # from subset to full pool if prop.CLUSTER_EXPERT_HEAD: unified_sel_ind = np.concatenate((sel_ind[0], sel_ind[1])) q_idxs = subset_ind[unified_sel_ind] # from subset to full pool remaining_ind = list(set(np.arange(len(pool_dataset))) - set(q_idxs)) sel_dataset = make_tensordataset(pool_dataset, q_idxs) train_dataset = concat_datasets(train_dataset, sel_dataset) pool_dataset = make_tensordataset(pool_dataset, remaining_ind) test_acc = train_validate_model(model, device, train_dataset, valid_dataset, test_dataset) return states, actions
def __init__(self, env, args, device='cpu'): """ Instantiate an MFEC Agent ---------- env: gym.Env gym environment to train on args: args class from argparser args are from from train.py: see train.py for help with each arg device: string 'cpu' or 'cuda:0' depending on use_cuda flag from train.py """ self.environment_type = args.environment_type self.env = env self.actions = range(self.env.action_space.n) self.frames_to_stack = args.frames_to_stack self.Q_train_algo = args.Q_train_algo self.use_Q_max = args.use_Q_max self.force_knn = args.force_knn self.weight_neighbors = args.weight_neighbors self.delta = args.delta self.device = device self.rs = np.random.RandomState(args.seed) # Hyperparameters self.epsilon = args.initial_epsilon self.final_epsilon = args.final_epsilon self.epsilon_decay = args.epsilon_decay self.gamma = args.gamma self.lr = args.lr self.q_lr = args.q_lr # Autoencoder for state embedding network self.vae_batch_size = args.vae_batch_size # batch size for training VAE self.vae_epochs = args.vae_epochs # number of epochs to run VAE self.embedding_type = args.embedding_type self.SR_embedding_type = args.SR_embedding_type self.embedding_size = args.embedding_size self.in_height = args.in_height self.in_width = args.in_width if self.embedding_type == 'VAE': self.vae_train_frames = args.vae_train_frames self.vae_loss = VAELoss() self.vae_print_every = args.vae_print_every self.load_vae_from = args.load_vae_from self.vae_weights_file = args.vae_weights_file self.vae = VAE(self.frames_to_stack, self.embedding_size, self.in_height, self.in_width) self.vae = self.vae.to(self.device) self.optimizer = get_optimizer(args.optimizer, self.vae.parameters(), self.lr) elif self.embedding_type == 'random': self.projection = self.rs.randn( self.embedding_size, self.in_height * self.in_width * self.frames_to_stack).astype(np.float32) elif self.embedding_type == 'SR': self.SR_train_algo = args.SR_train_algo self.SR_gamma = args.SR_gamma self.SR_epochs = args.SR_epochs self.SR_batch_size = args.SR_batch_size self.n_hidden = args.n_hidden self.SR_train_frames = args.SR_train_frames self.SR_filename = args.SR_filename if self.SR_embedding_type == 'random': self.projection = np.random.randn( self.embedding_size, self.in_height * self.in_width).astype(np.float32) if self.SR_train_algo == 'TD': self.mlp = MLP(self.embedding_size, self.n_hidden) self.mlp = self.mlp.to(self.device) self.loss_fn = nn.MSELoss(reduction='mean') params = self.mlp.parameters() self.optimizer = get_optimizer(args.optimizer, params, self.lr) # QEC self.max_memory = args.max_memory self.num_neighbors = args.num_neighbors self.qec = QEC(self.actions, self.max_memory, self.num_neighbors, self.use_Q_max, self.force_knn, self.weight_neighbors, self.delta, self.q_lr) #self.state = np.empty(self.embedding_size, self.projection.dtype) #self.action = int self.memory = [] self.print_every = args.print_every self.episodes = 0
class VPGAgent(Agent): """ An agent running online policy gradient. Calling VPGAgent itself uses REINFORCE, but can be subclassed for other policy gradient class algorithms. """ def __init__(self, params): super(VPGAgent, self).__init__(params) self.H = self.params['pg']['H'] self.lam = self.params['pg']['lam'] # Initialize policy network pol_params = self.params['pg']['pol_params'] pol_params['input_size'] = self.N pol_params['output_size'] = self.M if 'final_activation' not in pol_params: pol_params['final_activation'] = torch.tanh self.pol = MLP(pol_params) # Std's are not dependent on state init_log_std = -0.8 * torch.ones(self.M) # ~0.45 self.log_std = torch.nn.Parameter(init_log_std, requires_grad=True) # Create policy optimizer ppar = self.params['pg']['pol_optim'] self.pol_params = list(self.pol.parameters()) + [self.log_std] self.pol_optim = torch.optim.Adam(self.pol_params, lr=ppar['lr'], weight_decay=ppar['reg']) # Create value function and optimizer val_params = self.params['pg']['val_params'] val_params['input_size'] = self.N val_params['output_size'] = 1 self.val = MLP(val_params) vpar = self.params['pg']['val_optim'] self.val_optim = torch.optim.Adam(self.val.parameters(), lr=vpar['lr'], weight_decay=vpar['reg']) # Logging self.hist['ent'] = np.zeros(self.T) def get_dist(self, s): """ Create a pytorch normal distribution from the policy network for state s. """ s = torch.tensor(s, dtype=self.dtype) mu = self.pol.forward(s) std = self.log_std.exp() return torch.distributions.Normal(mu, std) def get_ent(self): """ Return the current entropy (multivariate Gaussian). """ std = self.log_std.exp() tpe = 2 * np.pi * np.e return .5 * torch.log(tpe * torch.prod(std)) def get_action(self): """ Gets action by running policy. """ self.pol.eval() if self.params['pg']['run_deterministic']: x = torch.tensor(self.prev_obs, dtype=self.dtype) act = self.pol.forward(x).detach().cpu().numpy() else: act = sample_pol(self.pol, self.log_std, self.prev_obs) act = np.clip(act, self.params['env']['min_act'], self.params['env']['max_act']) self.hist['ent'][self.time] = self.get_ent().detach().cpu().numpy() return act def do_updates(self): """ Performs actor and critic updates. """ if self.time % self.params['pg']['update_every'] == 0 or self.time == 1: plan_time = 0 H, num_rollouts = self.H, self.params['pg']['num_rollouts'] for i in range(self.params['pg']['num_iter']): # Sample rollouts using ground truth model check = time.time() rollouts = self.sample_rollouts(H, num_rollouts) plan_time += time.time() - check # Performs value updates alongside advantage calculation rews = self.update_pol(rollouts) # Time spent generating rollouts should be considered planning time self.hist['plan_time'][self.time - 1] += plan_time self.hist['update_time'][self.time - 1] -= plan_time def sample_rollouts(self, H, num_rollouts): """ Use traj module to sample rollouts using the policy. """ env_state = self.env.sim.get_state() if self.mujoco else None self.pol.eval() rollouts = traj.generate_trajectories( num_rollouts, self.env, env_state, self.prev_obs, mujoco=self.mujoco, perturb=self.perturb, H=self.H, gamma=self.gamma, act_mode='gauss', pt=(sample_pol, self.pol, self.log_std), terminal=None, tvel=self.tvel, num_cpu=self.params['pg']['num_cpu']) return rollouts def update_val(self, obs, targets): """ Update value function with MSE loss. """ preds = self.val.forward(obs) preds = torch.squeeze(preds, dim=-1) loss = torch.nn.functional.mse_loss(targets, preds) self.val_optim.zero_grad() loss.backward(retain_graph=True) self.val_optim.step() return loss.item() def calc_advs(self, obs, rews, update_vals=True): """ Calculate advantages for use of updating the policy (and updating value function). Can either use rewards-to-go or GAE. """ num_rollouts, H = obs.shape[:2] self.val.eval() if not self.params['pg']['use_gae']: # Calculate terminal values fin_obs = obs[:, -1] fin_vals = self.val.forward(fin_obs) fin_vals = torch.squeeze(fin_vals, dim=-1) # Calculate rewards-to-go rtg = torch.zeros((num_rollouts, H)) for k in reversed(range(H)): if k < H - 1: rtg[:, k] += self.gamma * rtg[:, k + 1] else: rtg[:, k] += self.gamma * fin_vals rtg[:, k] += rews[:, k] if update_vals: self.val.train() self.update_val(obs, rtg) # Normalize advantages for policy gradient for k in range(H): rtg[:, k] -= torch.mean(rtg[:, k]) return rtg # Generalized Advantage Estimation (GAE) prev_obs = torch.tensor(self.prev_obs, dtype=self.dtype) orig_val = self.val.forward(prev_obs) vals = torch.squeeze(self.val.forward(obs), dim=-1) deltas = torch.zeros(rews.shape) advs = torch.zeros((num_rollouts, H)) lg = self.lam * self.gamma for k in reversed(range(H)): prev_vals = vals[:, k - 1] if k > 0 else orig_val deltas[:, k] = self.gamma * vals[:, k] + rews[:, k] - prev_vals if k == H - 1: advs[:, k] = deltas[:, k] else: advs[:, k] = lg * advs[:, k + 1] + deltas[:, k] advs = advs.detach() # Optionally, also update the value functions if update_vals: self.val.train() # It is reasonable to train on advs or deltas dvals = advs # Have to perform trick to match deltas with prev vals fvals = torch.stack([orig_val for _ in range(vals.shape[0])], dim=0) rets = torch.cat( [fvals + dvals[:, :1], vals[:, :-1] + dvals[:, 1:]], dim=-1) fobs = torch.unsqueeze(prev_obs, dim=0) fobs = torch.stack([fobs for _ in range(vals.shape[0])], dim=0) obs = torch.cat([fobs, obs[:, :-1]], dim=1) self.update_val(obs, rets) # Normalize advantages for policy gradient advs -= torch.mean(advs) advs /= 1e-3 + torch.std(advs) return advs def get_pol_loss(self, logprob, advs, orig_logprob=None): """ For REINFORCE, the policy loss is thelogprobs times the advatanges. It is important that the logprobs carry the gradient so that we can backpropagate through them in the policy update. """ return torch.mean(logprob * advs) def get_logprob(self, pol, log_std, obs, acts): """ Get log probabilities for the actions, keeping the gradients. """ num_rollouts, H = obs.shape[0:2] pol.train() dist = self.get_dist(obs) logprob = dist.log_prob(acts).sum(-1) return logprob def update_pol(self, rollouts, orig_logprob=None): """ Update the policy on the on-policy rollouts. """ H = rollouts[0][0].shape[0] self.pol.train() obs = np.zeros((len(rollouts), self.H, self.N)) acts = np.zeros((len(rollouts), self.H, self.M)) rews = torch.zeros((len(rollouts), self.H)) for i in range(len(rollouts)): for k in range(self.H): obs[i, k] = rollouts[i][0][k] acts[i, k] = rollouts[i][1][k] rews[i, k] = rollouts[i][2][k] obs = torch.tensor(obs, dtype=self.dtype) acts = torch.tensor(acts, dtype=self.dtype) # Perform updates for multiple steps on the value function if self.params['pg']['use_gae']: for _ in range(self.params['pg']['val_steps']): advs = self.calc_advs(obs, rews, update_vals=True) else: advs = self.calc_advs(obs, rews, update_vals=False) # Perform updates for multiple epochs on the policy bsize = self.params['pg']['batch_size'] for _ in range(self.params['pg']['pol_steps']): inds = np.random.permutation(len(rollouts)) binds = inds[:bsize] bobs, bacts = obs[binds], acts[binds] brews, badvs = rews[binds], advs[binds] if orig_logprob is not None: bprobs = orig_logprob[binds] else: bprobs = None # Get a logprob that has gradients logprob = self.get_logprob(self.pol, self.log_std, bobs, bacts) if not self.continue_updates(logprob, bprobs): break # Compute policy loss (i.e. gradient ascent) J = -self.get_pol_loss(logprob, badvs, orig_logprob=bprobs) # Apply entropy bonus ent_coef = self.params['pg']['pol_optim']['ent_temp'] if ent_coef != 0: J -= ent_coef * self.get_ent() self.pol_optim.zero_grad() torch.nn.utils.clip_grad_norm_(self.pol.parameters(), self.params['pg']['grad_clip']) J.backward() self.pol_optim.step() # Clamp stds to be within set bounds log_min = np.log(self.params['pg']['min_std']) log_min = torch.tensor(log_min, dtype=self.dtype) log_max = np.log(self.params['pg']['max_std']) log_max = torch.tensor(log_max, dtype=self.dtype) self.log_std.data = torch.clamp(self.log_std.data, log_min, log_max) return rews def continue_updates(self, logprob, orig_logprob=None): """ Method for whether or not to continue updates. """ return True def print_logs(self): """ Policy gradient-specific logging information. """ bi, ei = super(VPGAgent, self).print_logs() self.print('policy gradient metrics', mode='head') self.print('entropy avg', np.mean(self.hist['ent'][bi:ei])) self.print('sigma avg', np.mean(torch.exp(self.log_std).detach().cpu().numpy())) return bi, ei
val_size=args.val_size, random_seed=args.random_seed, ) os.makedirs('losses/', exist_ok=True) if args.model.lower() == 'gbdt': from models.GBDT import GBDT model = GBDT(depth=args.depth) model.fit(X, y, train_mask, val_mask, test_mask, cat_features=cat_features, num_epochs=args.num_features, patience=args.patience, learning_rate=args.learning_rate, plot=False, verbose=False, loss_fn=args.loss_fn) elif args.model.lower() == 'mlp': from models.MLP import MLP model = MLP(task=args.task) min_rmse_epoch, accuracies = model.fit(X, y, train_mask, val_mask, test_mask, cat_features=cat_features, num_epochs=args.num_epochs, patience=args.patience, learning_rate=args.learning_rate, hidden_dim=args.hidden_dim, logging_epochs=args.logging_steps, loss_fn=args.loss_fn) model.plot(accuracies, legend=['Train', 'Val', 'Test'], title='MLP RMSE', output_fn='mlp_losses.pdf') elif args.model.lower() == 'gnn': from models.GNN import GNN model = GNN(heads=args.heads, feat_drop=args.feat_drop, attn_drop=args.attn_drop) min_rmse_epoch, accuracies = model.fit(networkx_graph, X, y, train_mask, val_mask, test_mask, cat_features=cat_features, num_epochs=args.num_epochs, patience=args.patience, learning_rate=args.learning_rate, hidden_dim=args.hidden_dim, logging_epochs=args.logging_steps, optimize_node_features=args.input_grad, loss_fn=args.loss_fn)
def classifier_selection(self): """ Function that instanciates classifiers :arg self (Trainer): instance of the class :return model (Classifier): Selected model when self.classfier is 1 model1 (Classifier): First selected model when self.classfier is 2 model2 (Classifier): Second selected model when self.classfier is 2 classifier_list (list): List with selected classifier names """ if self.classifier == 'SVM': classifier_list = ['SVM'] if self.classifier_type == 1: model = SVMClassifier(cv=self.cross_validation) elif self.classifier_type == 2: model1 = SVMClassifier(cv=self.cross_validation) model2 = SVMClassifier(cv=self.cross_validation) elif self.classifier == 'LogisticRegressor': classifier_list = ['LogisticRegressor'] model = LogisticRegressor() if self.classifier_type == 1: model = LogisticRegressor(cv=self.cross_validation) elif self.classifier_type == 2: model1 = LogisticRegressor(cv=self.cross_validation) model2 = LogisticRegressor(cv=self.cross_validation) elif self.classifier == 'MLP': classifier_list = ['MLP'] if self.classifier_type == 1: model = MLP(cv=self.cross_validation) elif self.classifier_type == 2: model1 = MLP(cv=self.cross_validation) model2 = MLP(cv=self.cross_validation) elif self.classifier == 'RandomForest': classifier_list = ['RandomForest'] model = RandomForest() if self.classifier_type == 1: model = RandomForest(cv=self.cross_validation) elif self.classifier_type == 2: model1 = RandomForest(cv=self.cross_validation) model2 = RandomForest(cv=self.cross_validation) elif self.classifier == 'RBF': classifier_list = ['RBF'] if self.classifier_type == 1: model = RBFClassifier(cv=self.cross_validation) elif self.classifier_type == 2: model1 = RBFClassifier(cv=self.cross_validation) model2 = RBFClassifier(cv=self.cross_validation) elif self.classifier == 'Fisher': classifier_list = ['Fisher'] if self.classifier_type == 1: model = FisherDiscriminant(cv=self.cross_validation) elif self.classifier_type == 2: model1 = FisherDiscriminant(cv=self.cross_validation) model2 = FisherDiscriminant(cv=self.cross_validation) elif self.classifier == 'all': classifier_list = ['SVM', 'MLP', 'LogisticRegressor', 'RandomForest', 'RBF', 'Fischer'] if self.classifier_type == 1: model_SVM = SVMClassifier(cv=self.cross_validation) model_MLP = MLP(cv=self.cross_validation) model_Logit = LogisticRegressor(cv=self.cross_validation) model_Forest = RandomForest(cv=self.cross_validation) model_RBF = RBFClassifier(cv=self.cross_validation) model_Fischer = FisherDiscriminant(cv=self.cross_validation) model = [model_SVM, model_MLP, model_Logit, model_Forest, model_RBF, model_Fischer] elif self.classifier_type == 2: model_SVM = SVMClassifier(cv=self.cross_validation) model_MLP = MLP(cv=self.cross_validation) model_Logit = LogisticRegressor(cv=self.cross_validation) model_Forest = RandomForest(cv=self.cross_validation) model_RBF = RBFClassifier(cv=self.cross_validation) model_Fischer = FisherDiscriminant(cv=self.cross_validation) model1 = [model_SVM, model_MLP, model_Logit, model_Forest, model_RBF, model_Fischer] model2 = copy.deepcopy(model1) else: raise SyntaxError('Invalid model name') if self.classifier_type == 1: return model, classifier_list elif self.classifier_type == 2: return model1, model2, classifier_list
def active_learn(exp_num, StrategyClass, subsample): # all strategies use same initial training data and model weights reinit_seed(prop.RANDOM_SEED) test_acc_list = [] if prop.MODEL.lower() == "mlp": model = MLP().apply(weights_init).to(device) if prop.MODEL.lower() == "cnn": model = CNN().apply(weights_init).to(device) if prop.MODEL.lower() == "resnet18": model = models.resnet.ResNet18().to(device) init_weights = copy.deepcopy(model.state_dict()) reinit_seed(exp_num * 10) dataset_pool, valid_dataset, test_dataset = get_data_splits() train_dataset, pool_dataset = stratified_split_dataset( dataset_pool, 2 * prop.NUM_CLASSES, prop.NUM_CLASSES) # # initial data strategy = StrategyClass(dataset_pool, valid_dataset, test_dataset, device) # calculate the overlap of strategy with other strategies strategies = [ MCDropoutSampling, EnsembleSampling, EntropySampling, LeastConfidenceSampling, CoreSetAltSampling, BadgeSampling ] overlapping_strategies = [] for StrategyClass in strategies: overlapping_strategies.append( StrategyClass(dataset_pool, valid_dataset, test_dataset)) t = trange(1, prop.NUM_ACQS + 1, desc="Aquisitions (size {})".format(prop.ACQ_SIZE), leave=True) for acq_num in t: model.load_state_dict(init_weights) test_acc = train_validate_model(model, device, train_dataset, valid_dataset, test_dataset) test_acc_list.append(test_acc) if subsample: subset_ind = np.random.choice(a=len(pool_dataset), size=prop.K, replace=False) pool_subset = make_tensordataset(pool_dataset, subset_ind) sel_ind, remain_ind = strategy.query(prop.ACQ_SIZE, model, train_dataset, pool_subset) q_idxs = subset_ind[sel_ind] # from subset to full pool remaining_ind = list( set(np.arange(len(pool_dataset))) - set(q_idxs)) sel_dataset = make_tensordataset(pool_dataset, q_idxs) train_dataset = concat_datasets(train_dataset, sel_dataset) pool_dataset = make_tensordataset(pool_dataset, remaining_ind) else: # all strategies work on k-sized windows in semi-batch setting sel_ind, remaining_ind = strategy.query(prop.ACQ_SIZE, model, train_dataset, pool_dataset) sel_dataset = make_tensordataset(pool_dataset, sel_ind) pool_dataset = make_tensordataset(pool_dataset, remaining_ind) train_dataset = concat_datasets(train_dataset, sel_dataset) logging.info( "Accuracy for {} sampling and {} acquisition is {}".format( strategy.name, acq_num, test_acc)) return test_acc_list
def create_model(mode='train', model_type='transformer'): if model_type == 'transformer': return SpeechTransformer(mode=mode, drop_rate=hparams.transformer_drop_rate) elif model_type == 'mlp': return MLP(mode, hparams.mlp_dropout_rate)
class MFEC: def __init__(self, env, args, device='cpu'): """ Instantiate an MFEC Agent ---------- env: gym.Env gym environment to train on args: args class from argparser args are from from train.py: see train.py for help with each arg device: string 'cpu' or 'cuda:0' depending on use_cuda flag from train.py """ self.environment_type = args.environment_type self.env = env self.actions = range(self.env.action_space.n) self.frames_to_stack = args.frames_to_stack self.Q_train_algo = args.Q_train_algo self.use_Q_max = args.use_Q_max self.force_knn = args.force_knn self.weight_neighbors = args.weight_neighbors self.delta = args.delta self.device = device self.rs = np.random.RandomState(args.seed) # Hyperparameters self.epsilon = args.initial_epsilon self.final_epsilon = args.final_epsilon self.epsilon_decay = args.epsilon_decay self.gamma = args.gamma self.lr = args.lr self.q_lr = args.q_lr # Autoencoder for state embedding network self.vae_batch_size = args.vae_batch_size # batch size for training VAE self.vae_epochs = args.vae_epochs # number of epochs to run VAE self.embedding_type = args.embedding_type self.SR_embedding_type = args.SR_embedding_type self.embedding_size = args.embedding_size self.in_height = args.in_height self.in_width = args.in_width if self.embedding_type == 'VAE': self.vae_train_frames = args.vae_train_frames self.vae_loss = VAELoss() self.vae_print_every = args.vae_print_every self.load_vae_from = args.load_vae_from self.vae_weights_file = args.vae_weights_file self.vae = VAE(self.frames_to_stack, self.embedding_size, self.in_height, self.in_width) self.vae = self.vae.to(self.device) self.optimizer = get_optimizer(args.optimizer, self.vae.parameters(), self.lr) elif self.embedding_type == 'random': self.projection = self.rs.randn( self.embedding_size, self.in_height * self.in_width * self.frames_to_stack).astype(np.float32) elif self.embedding_type == 'SR': self.SR_train_algo = args.SR_train_algo self.SR_gamma = args.SR_gamma self.SR_epochs = args.SR_epochs self.SR_batch_size = args.SR_batch_size self.n_hidden = args.n_hidden self.SR_train_frames = args.SR_train_frames self.SR_filename = args.SR_filename if self.SR_embedding_type == 'random': self.projection = np.random.randn( self.embedding_size, self.in_height * self.in_width).astype(np.float32) if self.SR_train_algo == 'TD': self.mlp = MLP(self.embedding_size, self.n_hidden) self.mlp = self.mlp.to(self.device) self.loss_fn = nn.MSELoss(reduction='mean') params = self.mlp.parameters() self.optimizer = get_optimizer(args.optimizer, params, self.lr) # QEC self.max_memory = args.max_memory self.num_neighbors = args.num_neighbors self.qec = QEC(self.actions, self.max_memory, self.num_neighbors, self.use_Q_max, self.force_knn, self.weight_neighbors, self.delta, self.q_lr) #self.state = np.empty(self.embedding_size, self.projection.dtype) #self.action = int self.memory = [] self.print_every = args.print_every self.episodes = 0 def choose_action(self, values): """ Choose epsilon-greedy policy according to Q-estimates """ # Exploration if self.rs.random_sample() < self.epsilon: self.action = self.rs.choice(self.actions) # Exploitation else: best_actions = np.argwhere(values == np.max(values)).flatten() self.action = self.rs.choice(best_actions) return self.action def TD_update(self, prev_embedding, prev_action, reward, values, time): # On-policy value estimate of current state (epsiloln-greedy) # Expected Sarsa v_t = (1 - self.epsilon) * np.max(values) + self.epsilon * np.mean(values) value = reward + self.gamma * v_t self.qec.update(prev_embedding, prev_action, value, time - 1) def MC_update(self): value = 0.0 for _ in range(len(self.memory)): experience = self.memory.pop() value = value * self.gamma + experience["reward"] self.qec.update( experience["state"], experience["action"], value, experience["time"], ) def add_to_memory(self, state_embedding, action, reward, time): self.memory.append({ "state": state_embedding, "action": action, "reward": reward, "time": time, }) def run_episode(self): """ Train an MFEC agent for a single episode: Interact with environment Perform update """ self.episodes += 1 RENDER_SPEED = 0.04 RENDER = False episode_frames = 0 total_reward = 0 total_steps = 0 # Update epsilon if self.epsilon > self.final_epsilon: self.epsilon = self.epsilon * self.epsilon_decay #self.env.seed(random.randint(0, 1000000)) state = self.env.reset() if self.environment_type == 'fourrooms': fewest_steps = self.env.shortest_path_length(self.env.state) done = False time = 0 while not done: time += 1 if self.embedding_type == 'random': state = np.array(state).flatten() state_embedding = np.dot(self.projection, state) elif self.embedding_type == 'VAE': state = torch.tensor(state).permute(2, 0, 1) #(H,W,C)->(C,H,W) state = state.unsqueeze(0).to(self.device) with torch.no_grad(): mu, logvar = self.vae.encoder(state) state_embedding = torch.cat([mu, logvar], 1) state_embedding = state_embedding.squeeze() state_embedding = state_embedding.cpu().numpy() elif self.embedding_type == 'SR': if self.SR_train_algo == 'TD': state = np.array(state).flatten() state_embedding = np.dot(self.projection, state) with torch.no_grad(): state_embedding = self.mlp( torch.tensor(state_embedding)).cpu().numpy() elif self.SR_train_algo == 'DP': s = self.env.state state_embedding = self.true_SR_dict[s] state_embedding = state_embedding / np.linalg.norm(state_embedding) if RENDER: self.env.render() time.sleep(RENDER_SPEED) # Get estimated value of each action values = [ self.qec.estimate(state_embedding, action) for action in self.actions ] action = self.choose_action(values) state, reward, done, _ = self.env.step(action) if self.Q_train_algo == 'MC': self.add_to_memory(state_embedding, action, reward, time) elif self.Q_train_algo == 'TD': if time > 1: self.TD_update(prev_embedding, prev_action, prev_reward, values, time) prev_reward = reward prev_embedding = state_embedding prev_action = action total_reward += reward total_steps += 1 episode_frames += self.env.skip if self.Q_train_algo == 'MC': self.MC_update() if self.episodes % self.print_every == 0: print("KNN usage:", np.mean(self.qec.knn_usage)) self.qec.knn_usage = [] print("Proportion of replace:", np.mean(self.qec.replace_usage)) self.qec.replace_usage = [] if self.environment_type == 'fourrooms': n_extra_steps = total_steps - fewest_steps return n_extra_steps, episode_frames, total_reward else: return episode_frames, total_reward def warmup(self): """ Collect 1 million frames from random policy and train VAE """ if self.embedding_type == 'VAE': if self.load_vae_from is not None: self.vae.load_state_dict(torch.load(self.load_vae_from)) self.vae = self.vae.to(self.device) else: # Collect 1 million frames from random policy print("Generating dataset to train VAE from random policy") vae_data = [] state = self.env.reset() total_frames = 0 while total_frames < self.vae_train_frames: action = random.randint(0, self.env.action_space.n - 1) state, reward, done, _ = self.env.step(action) vae_data.append(state) total_frames += self.env.skip if done: state = self.env.reset() # Dataset, Dataloader for 1 million frames vae_data = torch.tensor( vae_data ) # (N x H x W x C) - (1mill/skip X 84 X 84 X frames_to_stack) vae_data = vae_data.permute(0, 3, 1, 2) # (N x C x H x W) vae_dataset = TensorDataset(vae_data) vae_dataloader = DataLoader(vae_dataset, batch_size=self.vae_batch_size, shuffle=True) # Training loop print("Training VAE") self.vae.train() for epoch in range(self.vae_epochs): train_loss = 0 for batch_idx, batch in enumerate(vae_dataloader): batch = batch[0].to(self.device) self.optimizer.zero_grad() recon_batch, mu, logvar = self.vae(batch) loss = self.vae_loss(recon_batch, batch, mu, logvar) train_loss += loss.item() loss.backward() self.optimizer.step() if batch_idx % self.vae_print_every == 0: msg = 'VAE Epoch: {} [{}/{}]\tLoss: {:.6f}'.format( epoch, batch_idx * len(batch), len(vae_dataloader.dataset), loss.item() / len(batch)) print(msg) print('====> Epoch {} Average loss: {:.4f}'.format( epoch, train_loss / len(vae_dataloader.dataset))) if self.vae_weights_file is not None: torch.save(self.vae.state_dict(), self.vae_weights_file) self.vae.eval() elif self.embedding_type == 'SR': if self.SR_embedding_type == 'random': if self.SR_train_algo == 'TD': total_frames = 0 transitions = [] while total_frames < self.SR_train_frames: observation = self.env.reset() s_t = self.env.state # will not work on Atari done = False while not done: action = np.random.randint(self.env.action_space.n) observation, reward, done, _ = self.env.step( action) s_tp1 = self.env.state # will not work on Atari transitions.append((s_t, s_tp1)) total_frames += self.env.skip s_t = s_tp1 # Dataset, Dataloader dataset = SRDataset(self.env, self.projection, transitions) dataloader = DataLoader(dataset, batch_size=self.SR_batch_size, shuffle=True) train_losses = [] #Training loop for epoch in range(self.SR_epochs): for batch_idx, batch in enumerate(dataloader): self.optimizer.zero_grad() e_t, e_tp1 = batch e_t = e_t.to(self.device) e_tp1 = e_tp1.to(self.device) mhat_t = self.mlp(e_t) mhat_tp1 = self.mlp(e_tp1) target = e_t + self.gamma * mhat_tp1.detach() loss = self.loss_fn(mhat_t, target) loss.backward() self.optimizer.step() train_losses.append(loss.item()) print("Epoch:", epoch, "Average loss", np.mean(train_losses)) emb_reps = np.zeros( [self.env.n_states, self.embedding_size]) SR_reps = np.zeros( [self.env.n_states, self.embedding_size]) labels = [] room_size = self.env.room_size for i, (state, obs) in enumerate(self.env.state_dict.items()): emb = np.dot(self.projection, obs.flatten()) emb_reps[i, :] = emb with torch.no_grad(): emb = torch.tensor(emb).to(self.device) SR = self.mlp(emb).cpu().numpy() SR_reps[i, :] = SR if state[0] < room_size + 1 and state[ 1] < room_size + 1: label = 0 elif state[0] > room_size + 1 and state[ 1] < room_size + 1: label = 1 elif state[0] < room_size + 1 and state[ 1] > room_size + 1: label = 2 elif state[0] > room_size + 1 and state[ 1] > room_size + 1: label = 3 else: label = 4 labels.append(label) np.save('%s_SR_reps.npy' % (self.SR_filename), SR_reps) np.save('%s_emb_reps.npy' % (self.SR_filename), emb_reps) np.save('%s_labels.npy' % (self.SR_filename), labels) elif self.SR_train_algo == 'MC': pass elif self.SR_train_algo == 'DP': # Use this to ensure same order every time idx_to_state = { i: state for i, state in enumerate(self.env.state_dict.keys()) } state_to_idx = {v: k for k, v in idx_to_state.items()} T = np.zeros([self.env.n_states, self.env.n_states]) for i, s in idx_to_state.items(): for a in range(4): self.env.state = s _, _, _, _ = self.env.step(a) s_tp1 = self.env.state T[state_to_idx[s], state_to_idx[s_tp1]] += 0.25 true_SR = np.eye(self.env.n_states) done = False t = 0 while not done: t += 1 new_SR = true_SR + (self.SR_gamma**t) * (np.matmul( true_SR, T)) done = np.max(np.abs(true_SR - new_SR)) < 1e-10 true_SR = new_SR self.true_SR_dict = {} for s, obs in self.env.state_dict.items(): idx = state_to_idx[s] self.true_SR_dict[s] = true_SR[idx, :] else: pass # random projection doesn't require warmup