Beispiel #1
0
    def __init__(self, state_size, action_size, seed):
        """
        
        Initializes a DQN Agent.

        params:
            - state_size (int)  : dimension of each state.
            - action_size (int) : dimension of each action.
            - seed (int)        : random seed.

        """

        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # initialize the Q network
        self.qnet = FCNet(self.state_size, self.action_size, seed).to(device)
        # initialize the target Q network
        self.target_qnet = FCNet(self.state_size, self.action_size,
                                 seed).to(device)

        # create optimizer
        self.optimizer = optim.Adam(self.qnet.parameters(), lr=LR)

        # create replay buffer
        self.buffer = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)

        # initialize timestep for updates using C
        self.tstep = 0
Beispiel #2
0
def main():
	
	# build and train model
	model=FCNet() #ConvNet()
	model.cuda()
	criterion = torch.nn.NLLLoss() #MSELoss()
	#reg = (1 - dropout) / (2. * len(train_loader) * tau)
	optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum) #weight_decay=reg
	train(model, criterion, optimizer)

	test(model)

	# rotation test
	rotation_list = range(0, 180, 10)
	for data, _ in test_loader:
		data = data.cuda()
		data = Variable(data, volatile=True)
		for x in data:
			x.unsqueeze_(0)
			for r in rotation_list:
				rotation_matrix = Variable(torch.Tensor([[[math.cos(r/360.0*2*math.pi), -math.sin(r/360.0*2*math.pi), 0],
														[math.sin(r/360.0*2*math.pi), math.cos(r/360.0*2*math.pi), 0]]]).cuda(),
										volatile=True)
				grid = F.affine_grid(rotation_matrix, x.size())
				x_rotate = F.grid_sample(x, grid)
				output_variance, confidence, label = predict(model, x_rotate)
				print ('rotation degree', str(r).ljust(3), 'Uncertainty : {:.4f} Label : {} Softmax : {:.2f}'.format(output_variance, label, confidence))

	'''
Beispiel #3
0
def main():
    env = gym.make('Pong-v0')
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.n

    model = FCNet(6400, 200, action_dim)
    agent = PGAgent(env, 'Pong-v0', model, gamma=0.99, pre_processor=prepro)

    render = False
    agent.learn(render, 10000, 10, diff_frame=True)
Beispiel #4
0
def main():
    env = gym.make('CartPole-v0')
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.n

    model = FCNet(state_dim, 10, action_dim)
    batch_size = 50
    max_episodes = 4000
    agent = PGAgent(env, 'CartPole-v0', model, gamma=0.99, resume=False)

    render = False
    agent.learn(render, 4000, 50, False)
Beispiel #5
0
    def __init__(self, state_size, action_size, seed, hidden_layers, lr_policy,
                 use_reset, device):

        #self.main_net = ConvNet(state_size, feature_dim, seed, use_reset, input_channel).to(device)
        self.main_net = FCNet(state_size,
                              seed,
                              hidden_layers=[64, 64],
                              use_reset=True,
                              act_fnc=F.relu).to(device)
        self.policy = Policy(state_size, action_size, seed,
                             self.main_net).to(device)
        self.optimizer = optim.Adam(self.policy.parameters(), lr=lr_policy)
        self.device = device
Beispiel #6
0
Y = np.concatenate([Y[-shift:], Y[:-shift]])

# min-max scaling
scaler = preprocessing.MinMaxScaler()
X = scaler.fit_transform(X)

# train set and validation set
trainX = X[:50000, :]
validX = X[50000:, :]
trainY = Y[:50000]
validY = Y[50000:]

print("Compiling model...")
# opt = SGD(lr=0.01, momentum=0.5, nesterov=True)
opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999)
model = FCNet.build(DIM)
model.compile(loss="binary_crossentropy", optimizer=opt, metrics=['accuracy'])
print(model.summary())
print("Training model...")
H = model.fit(trainX,
              trainY,
              validation_data=(validX, validY),
              batch_size=BATCH_SIZE,
              epochs=EPOCHS)

# find min_loss and max_acc
i = 0
min_loss = 10
min_loss_epoch = 0
for item in H.history["val_loss"]:
    i += 1
Beispiel #7
0
def run(config):
    # Configure logger
    logger = logging.getLogger()
    logger.setLevel(config['logger_level'])

    # Log config
    for arg_name, arg in config.items():
        logger.info("initialization -- {} - {}".format(arg_name, arg))

    # Select device
    device = torch.device(
        'cuda:0') if torch.cuda.is_available() else torch.device('cpu')
    logger.info("device - {}".format(str(device)))

    # Dataset initilisation
    trainset = MediaEval18(root='./data',
                           train=True,
                           seq_len=config['seq_len'],
                           shuffle=True,
                           fragment=config['fragment'],
                           features=config['features'],
                           overlapping=config['overlapping'])
    trainloader = DataLoader(trainset,
                             batch_size=config['batch_size'],
                             shuffle=True,
                             num_workers=8)
    logger.info("trainset/loader initialized : trainset lenght : {}".format(
        len(trainset)))

    testset = MediaEval18(root='./data',
                          train=False,
                          seq_len=config['seq_len'],
                          shuffle=True,
                          fragment=config['fragment'],
                          features=config['features'],
                          overlapping=config['overlapping'])
    testloader = DataLoader(testset,
                            batch_size=config['batch_size'],
                            num_workers=8)
    logger.info("testset/loader initialized : testset lenght : {}".format(
        len(testset)))

    # Model initilisation
    if config['model'] == 'FC':
        model = FCNet(input_size=next(iter(trainset))[0].shape[1],
                      output_size=2,
                      num_hidden=config['num_hidden'],
                      hidden_size=config.get('hidden_size', -1),
                      dropout=config.get('dropout', 0))
    elif config['model'] == 'LSTM':
        model = RecurrentNet(input_size=next(iter(trainset))[0].shape[1],
                             hidden_size=config.get('hidden_size', -1),
                             num_layers=config['num_hidden'],
                             output_size=2,
                             dropout=config.get('dropout', 0),
                             bidirectional=config['bidirect'])
    elif config['model'] == 'CNN_LSTM':
        model = RecurrentNetWithCNN(input_size=next(
            iter(trainset))[0].shape[1],
                                    hidden_size=config.get('hidden_size', -1),
                                    num_layers=config['num_hidden'],
                                    output_size=2,
                                    dropout=config.get('dropout', 0),
                                    bidirectional=config['bidirect'])
    model.to(device)
    logger.info("model : {}".format(model))
    logger.info('number of param : {}'.format(
        sum(p.numel() for p in model.parameters())))
    logger.info('number of learnable param : {}'.format(
        sum(p.numel() for p in model.parameters() if p.requires_grad)))

    # Define criterion
    criterion = torch.nn.MSELoss()
    logger.info("criterion : {}".format(criterion))

    # Define optimizer
    attr_optimizer = config['optimizer']
    lr = config['lr']
    weight_decay = config['weight_decay']
    if attr_optimizer == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=lr,
                                     weight_decay=weight_decay)
    if attr_optimizer == 'RMSprop':
        optimizer = torch.optim.RMSprop(model.parameters(),
                                        lr=lr,
                                        weight_decay=weight_decay)
    if attr_optimizer == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    weight_decay=weight_decay,
                                    momentum=0.9)
    logger.info("optimizer : {}".format(optimizer))

    # Train model
    train_losses, test_losses = train_model(model=model,
                                            trainloader=trainloader,
                                            testloader=testloader,
                                            criterion=criterion,
                                            optimizer=optimizer,
                                            device=device,
                                            grad_clip=config['grad_clip'],
                                            nb_epoch=config['nb_epoch'])
    logger.info("training done")

    metrics = get_metrics(model, testloader)

    save_config_and_results(config, train_losses, test_losses, metrics)
Beispiel #8
0
    round(validation_split * len(dataset['train']))
]
dataset['train'], dataset['val'] = random_split(dataset['train'],
                                                lengths=split_len)

loader = {
    k: DataLoader(v, batch_size, shuffle=True, num_workers=1)
    for k, v in dataset.items()
}

### MODEL ###

from torch import nn, optim
from model import FCNet

net = FCNet(len(dataset['test'].data[0]))
criterion = nn.BCELoss()
optimizer = optim.Adam(net.parameters(), lr=lr, betas=(0.5, 0.999))

### TRAINING ###

best_auc = 0
for epoch in range(n_epochs):
    net.train()
    running_loss, S = 0, 0
    for i, (traj_id, data, label) in enumerate(loader['train']):
        optimizer.zero_grad()

        out = net(data)
        loss = criterion(out.view(-1), label.float())
Beispiel #9
0
class DQNAgent:
    """ A DQN Agent which interacts and learns from the environment. """
    def __init__(self, state_size, action_size, seed):
        """
        
        Initializes a DQN Agent.

        params:
            - state_size (int)  : dimension of each state.
            - action_size (int) : dimension of each action.
            - seed (int)        : random seed.

        """

        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # initialize the Q network
        self.qnet = FCNet(self.state_size, self.action_size, seed).to(device)
        # initialize the target Q network
        self.target_qnet = FCNet(self.state_size, self.action_size,
                                 seed).to(device)

        # create optimizer
        self.optimizer = optim.Adam(self.qnet.parameters(), lr=LR)

        # create replay buffer
        self.buffer = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)

        # initialize timestep for updates using C
        self.tstep = 0

    def step(self, state, action, reward, next_state, done):
        # save experiences in replay buffer
        self.buffer.push(state, action, reward, next_state, done)

        # Learn every C timesteps
        self.tstep = (self.tstep + 1) % C

        if self.tstep == 0:

            # check if enough samples are available in buffer
            if len(self.buffer) > BATCH_SIZE:
                experiences = self.buffer.sample()
                self.learn(experiences, GAMMA)

    def learn(self, experiences, gamma):
        """
        Updates value params using batch of experience tuples.

        params:
            - experiences (Tuple[torch.Variable]) : (s, a, r, s', done) tuple.
            - gamma (float)                       : discount factor.
        """

        # unpack experiences
        s, a, r, ns, d = experiences

        # get expected q vals from qnet
        q_exp = self.qnet(s).gather(1, a)

        # get max Q vals for next state from target_qnet
        q_next = self.target_qnet(ns).detach().max(1)[0].unsqueeze(1)

        # compute Q vals for current state
        q_current = r + (gamma * q_next * (1 - d))

        # compute loss
        loss = F.smooth_l1_loss(q_exp, q_current)  # huber loss
        # loss = F.mse_loss(q_exp, q_current)

        # minimize the loss
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        ######################## Update Target Network ########################
        self.soft_update(TAU)

    def soft_update(self, tau):
        """
        Performs a soft update for the parameters.
        theta_target = tau * theta_local + (1 - tau) * theta_target
        
        params:
            - TAU (float) : interpolation parameter. 
        """

        for target_param, local_param in zip(self.target_qnet.parameters(),
                                             self.qnet.parameters()):
            target_param.data.copy_(tau * local_param.data +
                                    (1 - tau) * target_param.data)

    def act(self, state, eps=0.):
        """ 
        Returns actions for a given state as per current policy.

        params:
            - state (array like) : current state.
            - eps (float)        : epsilon for eps-greedy action selection.
        """

        state = torch.from_numpy(state).float().unsqueeze(0).to(device)

        # set to eval mode
        self.qnet.eval()

        with torch.no_grad():
            # get action values
            act_vals = self.qnet(state)

        # turn back to train mode
        self.qnet.train()

        # epsilon greedy action selection
        if random.random() > eps:
            return np.argmax(act_vals.cpu().data.numpy())
        else:
            return random.choice(np.arange(self.action_size))
from torch.utils.data import DataLoader, random_split
from dataset import NextWaveDataset

dataset = {
    'train': NextWaveDataset('../results/train_clean.csv'),
    'test': NextWaveDataset('../results/test_clean.csv')
}

loader = {k: DataLoader(v, batch_size, shuffle=True, num_workers=1) for k, v in dataset.items()}

### MODEL ###

from torch import nn, optim
from model import FCNet

net = FCNet(len(dataset['test'].data[0]))

net.load_state_dict(
    torch.load('../checkpoint/epp16AUC0.981.pt')
)

criterion = nn.BCELoss()
optimizer = optim.Adam(net.parameters(), lr=lr, betas=(0.5, 0.999))

### PREDICT ###

net.eval()
predictions = []
for i, (traj_id, data, label) in enumerate(loader['test']):
    out = net(data)
    for t, p in zip(traj_id, out):
Beispiel #11
0
def getFCNet_Config():
    cf = Config(20, 0.1, False)
    net = FCNet.FCNet()
    return net, cf