def __init__(self, state_size, action_size, seed): """ Initializes a DQN Agent. params: - state_size (int) : dimension of each state. - action_size (int) : dimension of each action. - seed (int) : random seed. """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) # initialize the Q network self.qnet = FCNet(self.state_size, self.action_size, seed).to(device) # initialize the target Q network self.target_qnet = FCNet(self.state_size, self.action_size, seed).to(device) # create optimizer self.optimizer = optim.Adam(self.qnet.parameters(), lr=LR) # create replay buffer self.buffer = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # initialize timestep for updates using C self.tstep = 0
def main(): # build and train model model=FCNet() #ConvNet() model.cuda() criterion = torch.nn.NLLLoss() #MSELoss() #reg = (1 - dropout) / (2. * len(train_loader) * tau) optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum) #weight_decay=reg train(model, criterion, optimizer) test(model) # rotation test rotation_list = range(0, 180, 10) for data, _ in test_loader: data = data.cuda() data = Variable(data, volatile=True) for x in data: x.unsqueeze_(0) for r in rotation_list: rotation_matrix = Variable(torch.Tensor([[[math.cos(r/360.0*2*math.pi), -math.sin(r/360.0*2*math.pi), 0], [math.sin(r/360.0*2*math.pi), math.cos(r/360.0*2*math.pi), 0]]]).cuda(), volatile=True) grid = F.affine_grid(rotation_matrix, x.size()) x_rotate = F.grid_sample(x, grid) output_variance, confidence, label = predict(model, x_rotate) print ('rotation degree', str(r).ljust(3), 'Uncertainty : {:.4f} Label : {} Softmax : {:.2f}'.format(output_variance, label, confidence)) '''
def main(): env = gym.make('Pong-v0') state_dim = env.observation_space.shape[0] action_dim = env.action_space.n model = FCNet(6400, 200, action_dim) agent = PGAgent(env, 'Pong-v0', model, gamma=0.99, pre_processor=prepro) render = False agent.learn(render, 10000, 10, diff_frame=True)
def main(): env = gym.make('CartPole-v0') state_dim = env.observation_space.shape[0] action_dim = env.action_space.n model = FCNet(state_dim, 10, action_dim) batch_size = 50 max_episodes = 4000 agent = PGAgent(env, 'CartPole-v0', model, gamma=0.99, resume=False) render = False agent.learn(render, 4000, 50, False)
def __init__(self, state_size, action_size, seed, hidden_layers, lr_policy, use_reset, device): #self.main_net = ConvNet(state_size, feature_dim, seed, use_reset, input_channel).to(device) self.main_net = FCNet(state_size, seed, hidden_layers=[64, 64], use_reset=True, act_fnc=F.relu).to(device) self.policy = Policy(state_size, action_size, seed, self.main_net).to(device) self.optimizer = optim.Adam(self.policy.parameters(), lr=lr_policy) self.device = device
Y = np.concatenate([Y[-shift:], Y[:-shift]]) # min-max scaling scaler = preprocessing.MinMaxScaler() X = scaler.fit_transform(X) # train set and validation set trainX = X[:50000, :] validX = X[50000:, :] trainY = Y[:50000] validY = Y[50000:] print("Compiling model...") # opt = SGD(lr=0.01, momentum=0.5, nesterov=True) opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999) model = FCNet.build(DIM) model.compile(loss="binary_crossentropy", optimizer=opt, metrics=['accuracy']) print(model.summary()) print("Training model...") H = model.fit(trainX, trainY, validation_data=(validX, validY), batch_size=BATCH_SIZE, epochs=EPOCHS) # find min_loss and max_acc i = 0 min_loss = 10 min_loss_epoch = 0 for item in H.history["val_loss"]: i += 1
def run(config): # Configure logger logger = logging.getLogger() logger.setLevel(config['logger_level']) # Log config for arg_name, arg in config.items(): logger.info("initialization -- {} - {}".format(arg_name, arg)) # Select device device = torch.device( 'cuda:0') if torch.cuda.is_available() else torch.device('cpu') logger.info("device - {}".format(str(device))) # Dataset initilisation trainset = MediaEval18(root='./data', train=True, seq_len=config['seq_len'], shuffle=True, fragment=config['fragment'], features=config['features'], overlapping=config['overlapping']) trainloader = DataLoader(trainset, batch_size=config['batch_size'], shuffle=True, num_workers=8) logger.info("trainset/loader initialized : trainset lenght : {}".format( len(trainset))) testset = MediaEval18(root='./data', train=False, seq_len=config['seq_len'], shuffle=True, fragment=config['fragment'], features=config['features'], overlapping=config['overlapping']) testloader = DataLoader(testset, batch_size=config['batch_size'], num_workers=8) logger.info("testset/loader initialized : testset lenght : {}".format( len(testset))) # Model initilisation if config['model'] == 'FC': model = FCNet(input_size=next(iter(trainset))[0].shape[1], output_size=2, num_hidden=config['num_hidden'], hidden_size=config.get('hidden_size', -1), dropout=config.get('dropout', 0)) elif config['model'] == 'LSTM': model = RecurrentNet(input_size=next(iter(trainset))[0].shape[1], hidden_size=config.get('hidden_size', -1), num_layers=config['num_hidden'], output_size=2, dropout=config.get('dropout', 0), bidirectional=config['bidirect']) elif config['model'] == 'CNN_LSTM': model = RecurrentNetWithCNN(input_size=next( iter(trainset))[0].shape[1], hidden_size=config.get('hidden_size', -1), num_layers=config['num_hidden'], output_size=2, dropout=config.get('dropout', 0), bidirectional=config['bidirect']) model.to(device) logger.info("model : {}".format(model)) logger.info('number of param : {}'.format( sum(p.numel() for p in model.parameters()))) logger.info('number of learnable param : {}'.format( sum(p.numel() for p in model.parameters() if p.requires_grad))) # Define criterion criterion = torch.nn.MSELoss() logger.info("criterion : {}".format(criterion)) # Define optimizer attr_optimizer = config['optimizer'] lr = config['lr'] weight_decay = config['weight_decay'] if attr_optimizer == 'Adam': optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) if attr_optimizer == 'RMSprop': optimizer = torch.optim.RMSprop(model.parameters(), lr=lr, weight_decay=weight_decay) if attr_optimizer == 'SGD': optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay, momentum=0.9) logger.info("optimizer : {}".format(optimizer)) # Train model train_losses, test_losses = train_model(model=model, trainloader=trainloader, testloader=testloader, criterion=criterion, optimizer=optimizer, device=device, grad_clip=config['grad_clip'], nb_epoch=config['nb_epoch']) logger.info("training done") metrics = get_metrics(model, testloader) save_config_and_results(config, train_losses, test_losses, metrics)
round(validation_split * len(dataset['train'])) ] dataset['train'], dataset['val'] = random_split(dataset['train'], lengths=split_len) loader = { k: DataLoader(v, batch_size, shuffle=True, num_workers=1) for k, v in dataset.items() } ### MODEL ### from torch import nn, optim from model import FCNet net = FCNet(len(dataset['test'].data[0])) criterion = nn.BCELoss() optimizer = optim.Adam(net.parameters(), lr=lr, betas=(0.5, 0.999)) ### TRAINING ### best_auc = 0 for epoch in range(n_epochs): net.train() running_loss, S = 0, 0 for i, (traj_id, data, label) in enumerate(loader['train']): optimizer.zero_grad() out = net(data) loss = criterion(out.view(-1), label.float())
class DQNAgent: """ A DQN Agent which interacts and learns from the environment. """ def __init__(self, state_size, action_size, seed): """ Initializes a DQN Agent. params: - state_size (int) : dimension of each state. - action_size (int) : dimension of each action. - seed (int) : random seed. """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) # initialize the Q network self.qnet = FCNet(self.state_size, self.action_size, seed).to(device) # initialize the target Q network self.target_qnet = FCNet(self.state_size, self.action_size, seed).to(device) # create optimizer self.optimizer = optim.Adam(self.qnet.parameters(), lr=LR) # create replay buffer self.buffer = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # initialize timestep for updates using C self.tstep = 0 def step(self, state, action, reward, next_state, done): # save experiences in replay buffer self.buffer.push(state, action, reward, next_state, done) # Learn every C timesteps self.tstep = (self.tstep + 1) % C if self.tstep == 0: # check if enough samples are available in buffer if len(self.buffer) > BATCH_SIZE: experiences = self.buffer.sample() self.learn(experiences, GAMMA) def learn(self, experiences, gamma): """ Updates value params using batch of experience tuples. params: - experiences (Tuple[torch.Variable]) : (s, a, r, s', done) tuple. - gamma (float) : discount factor. """ # unpack experiences s, a, r, ns, d = experiences # get expected q vals from qnet q_exp = self.qnet(s).gather(1, a) # get max Q vals for next state from target_qnet q_next = self.target_qnet(ns).detach().max(1)[0].unsqueeze(1) # compute Q vals for current state q_current = r + (gamma * q_next * (1 - d)) # compute loss loss = F.smooth_l1_loss(q_exp, q_current) # huber loss # loss = F.mse_loss(q_exp, q_current) # minimize the loss self.optimizer.zero_grad() loss.backward() self.optimizer.step() ######################## Update Target Network ######################## self.soft_update(TAU) def soft_update(self, tau): """ Performs a soft update for the parameters. theta_target = tau * theta_local + (1 - tau) * theta_target params: - TAU (float) : interpolation parameter. """ for target_param, local_param in zip(self.target_qnet.parameters(), self.qnet.parameters()): target_param.data.copy_(tau * local_param.data + (1 - tau) * target_param.data) def act(self, state, eps=0.): """ Returns actions for a given state as per current policy. params: - state (array like) : current state. - eps (float) : epsilon for eps-greedy action selection. """ state = torch.from_numpy(state).float().unsqueeze(0).to(device) # set to eval mode self.qnet.eval() with torch.no_grad(): # get action values act_vals = self.qnet(state) # turn back to train mode self.qnet.train() # epsilon greedy action selection if random.random() > eps: return np.argmax(act_vals.cpu().data.numpy()) else: return random.choice(np.arange(self.action_size))
from torch.utils.data import DataLoader, random_split from dataset import NextWaveDataset dataset = { 'train': NextWaveDataset('../results/train_clean.csv'), 'test': NextWaveDataset('../results/test_clean.csv') } loader = {k: DataLoader(v, batch_size, shuffle=True, num_workers=1) for k, v in dataset.items()} ### MODEL ### from torch import nn, optim from model import FCNet net = FCNet(len(dataset['test'].data[0])) net.load_state_dict( torch.load('../checkpoint/epp16AUC0.981.pt') ) criterion = nn.BCELoss() optimizer = optim.Adam(net.parameters(), lr=lr, betas=(0.5, 0.999)) ### PREDICT ### net.eval() predictions = [] for i, (traj_id, data, label) in enumerate(loader['test']): out = net(data) for t, p in zip(traj_id, out):
def getFCNet_Config(): cf = Config(20, 0.1, False) net = FCNet.FCNet() return net, cf