コード例 #1
0
    def prepare_batch(self, target_network, q_network):

        batch_size = min(self.length, self.args.batch_size)

        sample = self.memory.sample(batch_size)

        s = t.tensor(sample['obs'])
        a = t.tensor(sample['act'])
        r = t.tensor(sample['rew'])
        ns = t.tensor(sample['next_obs'])
        term = t.tensor(sample['terminal'])

        states = s.permute(0, 3, 1, 2).to(Device.get_device())
        actions = a.type(t.int64).to(Device.get_device())
        rewards = r.to(Device.get_device())
        next_states = ns.permute(0, 3, 1, 2).to(Device.get_device())
        terminals = term.to(Device.get_device())

        indexes = sample["indexes"]

        with t.no_grad():

            target = rewards + terminals * self.args.gamma * target_network(
                next_states).max()
            predicted = q_network(states).gather(1, actions)

        new_priorities = f.smooth_l1_loss(predicted, target,
                                          reduction='none').cpu().numpy()
        new_priorities[new_priorities < 1] = 1

        self.memory.update_priorities(indexes, new_priorities)

        return states, actions, rewards, next_states, terminals
コード例 #2
0
ファイル: stadyna.py プロジェクト: tempbottle/StaDynA
def getDeviceForDynAnalysis():
    dev_list = Device.get_devices_list()
    
    devNum = len(dev_list)
    
    if devNum <= 0:
        logger.error("No device has been detected! Connect your device and restart the application!")
        return None
    
    if devNum == 1:
        return Device.get_device(dev_list[0])
    
    choice = None
    if devNum > 1:
        print "Select the device to use for analysis:\n"
        for i in xrange(0, devNum):
            print "%d. %s\n" % ((i + 1), dev_list[i])
        
        while not choice:
            try:
                choice = int(raw_input())
                if choice not in range(1, devNum+1):
                    choice = None
                    print 'Invalid choice! Choose right number!'
            except ValueError:
                print 'Invalid Number! Choose right number!'
        
        
    return Device.get_device(dev_list[choice - 1])
コード例 #3
0
def getDeviceForDynAnalysis():
    dev_list = Device.get_devices_list()

    devNum = len(dev_list)

    if devNum <= 0:
        logger.error(
            "No device has been detected! Connect your device and restart the application!"
        )
        return None

    if devNum == 1:
        return Device.get_device(dev_list[0])

    choice = None
    if devNum > 1:
        print "Select the device to use for analysis:\n"
        for i in xrange(0, devNum):
            print "%d. %s\n" % ((i + 1), dev_list[i])

        while not choice:
            try:
                choice = int(raw_input())
                if choice not in range(1, devNum + 1):
                    choice = None
                    print 'Invalid choice! Choose right number!'
            except ValueError:
                print 'Invalid Number! Choose right number!'

    return Device.get_device(dev_list[choice - 1])
コード例 #4
0
ファイル: prepare_input.py プロジェクト: dixantmittal/mctsnet
def prepare_input_for_f_backup(node, action, reward):
    memory = node.tensors.memory
    child_memory = node.variables.children[action].tensors.memory

    action = to_one_hot(action, SIMULATOR.n_actions).to(Device.get_device())
    reward = t.tensor([reward]).float().to(Device.get_device())

    return memory, child_memory, action, reward
コード例 #5
0
ファイル: performer.py プロジェクト: dixantmittal/async-dqn
def performer(idx, model, SIMULATOR):
    # allocate a device
    n_gpu = t.cuda.device_count()
    if n_gpu > 0:
        Device.set_device(idx % n_gpu)

    q_network = deepcopy(model)
    q_network.to(Device.get_device())
    q_network.eval()

    simulator = SIMULATOR()

    state = simulator.reset()
    episode_reward = 0

    terminal = False
    while not terminal:
        action = q_network(as_tensor(state)).argmax().item()

        next_state, reward, terminal = simulator.step(action)

        episode_reward += reward
        state = next_state

    return episode_reward
コード例 #6
0
ファイル: mctsnet.py プロジェクト: Hung86/CS5446_AI_planning
    def state_to_tensor(self, state):
        key = str(state)
        tensor = self.tensor_cache.get(key)
        if tensor is None:
            tensor = SIMULATOR.state_to_tensor(state).to(Device.get_device())
            self.tensor_cache[key] = tensor

        return tensor
コード例 #7
0
def calculate_loss(training_data, action, args):
    # find the predictions, embeddings and sampled actions
    predictions, logits, actions = training_data

    # duplicate action len(predictions) times to get loss after each simulation
    action = t.tensor([action] * len(predictions)).long().to(
        Device.get_device())

    predictions = t.stack(predictions)

    # Compute cross entropy loss to train differentiable parts
    loss = f.cross_entropy(predictions[-1].unsqueeze(0),
                           action[-1].unsqueeze(0))

    loss += args.beta * t.sum(
        t.softmax(predictions[-1], dim=0) *
        t.log_softmax(predictions[-1], dim=0))

    # Compute decrease in loss after each simulation
    l_m = f.cross_entropy(predictions, action,
                          reduction='none').clone().detach()
    r_m = l_m[:-1] - l_m[1:]

    # compute geometric sum for difference in loss
    for i in reversed(range(0, len(r_m) - 1)):
        r_m[i] = r_m[i] + args.gamma * r_m[i + 1]

    # calculate loss for tree search actions
    for l_m, logits_m, action_m in zip(r_m, logits, actions):
        action_m = t.tensor(action_m).long().to(Device.get_device())

        # find logits
        logits_m = t.stack(logits_m)

        # find negative likelihood to minimise
        negative_log_likelihood = f.cross_entropy(
            logits_m, action_m, reduction='sum') * l_m

        # add it to loss
        loss += negative_log_likelihood

    return loss
コード例 #8
0
def collector(idx, shared_model, shared_dataset, hyperparameters, lock):
    try:
        writer = SummaryWriter('runs/{}/collector:{:02}'.format(
            datetime.now().strftime("%d|%m_%H|%M"), idx))
        logging.basicConfig(filename='logs/collector:{:02}.log'.format(idx),
                            filemode='w',
                            format='%(message)s',
                            level=logging.DEBUG)

        # allocate a device
        n_gpu = t.cuda.device_count()
        if n_gpu > 0:
            Device.set_device(idx % n_gpu)

        local_model = deepcopy(shared_model)
        local_model.to(Device.get_device())
        local_model.eval()

        simulator = SIMULATOR()

        for itr in tqdm(count(),
                        position=idx,
                        desc='collector:{:02}'.format(idx)):
            local_model.load_state_dict(shared_model.state_dict())

            state = simulator.reset()

            episode_reward = 0
            for i in range(50):
                # Find the expert action for input belief
                expert_action, _ = expert(state, hyperparameters)

                lock.acquire()
                shared_dataset.append((state, expert_action))
                lock.release()

                # Simulate the learner's action
                action, _ = local_model.search(state, hyperparameters)
                state, reward, terminal = simulator.step(action)
                episode_reward += reward

                if terminal:
                    break

            logging.debug('Episode reward: {:.2f}'.format(episode_reward))
            writer.add_scalar('episode_reward', episode_reward, itr)
            writer.close()

    except KeyboardInterrupt:
        print('exiting collector:{:02}'.format(idx))
コード例 #9
0
 def perform(self,args):
     
     
     # allocate a device
     n_gpu = t.cuda.device_count()
     if n_gpu > 0:
         Device.set_device(1)
     
     q_network = deepcopy(self.model)
     q_network.to(Device.get_device())
     q_network.eval()
     
     num_reached = 0
     
     
     for n in range(args.n_tests):
         
     
         state = self.simulator.reset()
         state_processed = np.concatenate((state.front_rgb,state.wrist_rgb),axis=2)
             
         episode_reward = 0
         
         terminal = False
         
         for i in range(800):
          
             if np.random.RandomState().rand() < 0.1:
                 action = np.random.RandomState().randint(self.simulator.n_actions())
             else:
                 action = q_network(as_tensor(state_processed)).argmax().item()
                          
                 
             next_state, reward, terminal = self.simulator.step(action,state)
                 
             episode_reward += reward
                         
             state_processed = np.concatenate((next_state.front_rgb,next_state.wrist_rgb),axis=2)
             state = next_state
             
             if (terminal):
                 print("\nTrial {} reached the goal!".format(n+1))
                 num_reached += 1
                 break
             	
         print("\nEpisode reward: {}".format(episode_reward))
         
     print("\n\nSuccess rate: {}/{}".format(num_reached,args.n_tests))
コード例 #10
0
def worker(idx, solver, args):
    n_gpu = t.cuda.device_count()
    if n_gpu > 0:
        Device.set_device(idx % n_gpu)

    solver.to(Device.get_device())

    rewards = []
    with t.no_grad():
        for _ in tqdm(range(args.n_samples),
                      position=idx,
                      desc='worker_{:02}'.format(idx),
                      file=sys.stdout):
            rewards.append(performer(solver, args))

    return rewards
コード例 #11
0
def main():
    dev = Device.get_device("303195BA0D4D00EC")

    messages = Queue.Queue()
    seccon_producer = SecconMessageProducer(messages, dev)
    seccon_consumer = SecconMessageProcessor(messages)
    seccon_producer.setDaemon(False)
    seccon_consumer.setDaemon(False)
    seccon_producer.start()
    seccon_consumer.start()

    time.sleep(60)
    print "Time is finished!!!"
    seccon_producer.stopThread()
    seccon_consumer.stopThread()
    seccon_producer.join()
    seccon_consumer.join()
コード例 #12
0
ファイル: messages.py プロジェクト: tempbottle/StaDynA
def main():         
    dev = Device.get_device("303195BA0D4D00EC")
    
    messages = Queue.Queue()
    seccon_producer = SecconMessageProducer(messages, dev)
    seccon_consumer = SecconMessageProcessor(messages)
    seccon_producer.setDaemon(False)
    seccon_consumer.setDaemon(False)
    seccon_producer.start()
    seccon_consumer.start()
    
    time.sleep(60)
    print "Time is finished!!!"
    seccon_producer.stopThread()
    seccon_consumer.stopThread()
    seccon_producer.join()
    seccon_consumer.join()
コード例 #13
0
ファイル: optimiser.py プロジェクト: dixantmittal/mctsnet
def optimiser(idx, shared_model, shared_dataset, hyperparameters, lock):
    try:
        writer = SummaryWriter('runs/{}/optimiser:{:02}'.format(datetime.now().strftime("%d|%m_%H|%M"), idx))
        logging.basicConfig(filename='logs/optimiser:{:02}.log'.format(idx),
                            filemode='w',
                            format='%(message)s',
                            level=logging.DEBUG)

        optimiser = t.optim.SGD(params=shared_model.parameters(), lr=hyperparameters.lr)

        # allocate a device
        n_gpu = t.cuda.device_count()
        if n_gpu > 0:
            Device.set_device(idx % n_gpu)

        local_model = deepcopy(shared_model)
        local_model.to(Device.get_device())
        local_model.train()

        for itr in tqdm(count(), position=idx, desc='optimiser:{:02}'.format(idx)):
            # Sync local model with shared model
            if itr % hyperparameters.sync_frequency == 0:
                local_model.load_state_dict(shared_model.state_dict())

            # Sample a data point from dataset
            state, expert_action = choice(shared_dataset)

            # Find the predicted action
            action, training_info = local_model.search(state, hyperparameters)

            # Optimise for the sample
            loss = calculate_loss(training_info, expert_action, hyperparameters)

            optimise_model(shared_model, local_model, loss, optimiser, lock)

            # Log the results
            logging.debug('Sample loss: {:.2f}'.format(loss.item()))
            writer.add_scalar('loss/sample_loss', loss.item(), itr)
            writer.close()

    except KeyboardInterrupt:
        print('exiting optimiser:{:02}'.format(idx))
コード例 #14
0
ファイル: optimiser.py プロジェクト: dixantmittal/async-dqn
def optimiser(idx, shared_model, SIMULATOR, args, lock):
    try:
        writer = SummaryWriter('runs/{}/optimiser:{:02}'.format(datetime.now().strftime("%d|%m_%H|%M"), idx))
        logging.basicConfig(filename='logs/optimiser:{:02}.log'.format(idx),
                            filemode='w',
                            format='%(message)s',
                            level=logging.DEBUG)

        sgd = t.optim.SGD(params=shared_model.parameters(), lr=args.lr)

        # allocate a device
        n_gpu = t.cuda.device_count()
        if n_gpu > 0:
            Device.set_device(idx % n_gpu)

        q_network = deepcopy(shared_model)
        q_network.to(Device.get_device())
        q_network.train()

        target_network = deepcopy(q_network)
        target_network.to(Device.get_device())
        target_network.eval()

        buffer = deque(maxlen=args.buffer_size)

        simulator = SIMULATOR()
        for itr in tqdm(count(), position=idx, desc='optimiser:{:02}'.format(idx)):

            state = simulator.reset()
            episode_reward = 0
            for e in count():
                if np.random.RandomState().rand() < max(args.eps ** itr, args.min_eps):
                    action = np.random.RandomState().randint(simulator.n_actions())
                else:
                    action = q_network(as_tensor(state)).argmax().item()

                next_state, reward, terminal = simulator.step(action)

                buffer.append(transition_to_tensor(state, action, reward, next_state, terminal))

                episode_reward += reward
                state = next_state

                # Sample a data point from dataset
                batch = prepare_batch(buffer, args.batch_size)

                # Sync local model with shared model
                q_network.load_state_dict(shared_model.state_dict())

                # Calculate loss for the batch
                loss = calculate_loss(q_network, target_network, batch, args)

                # Optimise for the batch
                loss = optimise_model(shared_model, q_network, loss, sgd, args, lock)

                # Log the results
                logging.debug('Batch loss: {:.2f}'.format(loss))
                writer.add_scalar('batch/loss', loss, e)

                if terminal:
                    break

            logging.debug('Episode reward: {:.2f}'.format(episode_reward))
            writer.add_scalar('episode_reward', episode_reward, itr)
            writer.close()

            if itr % args.target_update_frequency == 0:
                target_network.load_state_dict(q_network.state_dict())

    except KeyboardInterrupt:
        print('exiting optimiser:{:02}'.format(idx))
コード例 #15
0
ファイル: utils.py プロジェクト: dixantmittal/async-dqn
def as_tensor(x, dtype=t.float32):
    return t.tensor(x, dtype=dtype, device=Device.get_device())
コード例 #16
0
solvers = {'mctsnet': MCTSnet(), 'mcts': MCTS()}

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--solver',
                        dest='solver',
                        default='mctsnet',
                        help='Solver to use')
    parser.add_argument('--load_model',
                        dest='load_model',
                        default='models/checkpoint.model',
                        help='Path to load model file')
    parser.add_argument('--n_simulations',
                        dest='n_simulations',
                        default=10,
                        type=int,
                        help='Number of tree simulations')
    args = parser.parse_args()

    args.training = False

    if t.cuda.is_available():
        Device.set_device(0)

    model = solvers[args.solver]
    model.load(args.load_model)
    model.to(Device.get_device())

    with t.no_grad():
        print('Episode reward:', performer(model, args, render=True))
コード例 #17
0
def optimise(idx, shared_model, queues, args, lock):
    try:

        writer = SummaryWriter('runs/o{}'.format(idx))

        logging.basicConfig(filename='logs/optimiser:{:02}.log'.format(idx),
                            filemode='w',
                            format='%(message)s',
                            level=logging.DEBUG)

        sgd = t.optim.Adam(params=shared_model.parameters(), lr=args.lr)

        # allocate a device
        n_gpu = t.cuda.device_count()
        if n_gpu > 0:
            Device.set_device(0)

        q_network = deepcopy(shared_model)
        q_network.to(Device.get_device())
        q_network.train()

        target_network = deepcopy(q_network)
        target_network.to(Device.get_device())
        target_network.eval()

        buffer = ReplayBuffer(args)

        for itr in tqdm(count(),
                        position=idx,
                        desc='optimiser:{:02}'.format(idx)):

            buffer.load_queues(queues, q_network, target_network, lock, args)

            while (len(buffer) < min(
                    args.n_workers * args.episode_length * args.warmup,
                    args.buffer_size / 2)):
                buffer.load_queues(queues, q_network, target_network, lock,
                                   args)
                continue

            # Sample a data point from dataset
            batch = buffer.prepare_batch(target_network, q_network)

            # Sync local model with shared model
            q_network.load_state_dict(shared_model.state_dict())

            # Calculate loss for the batch
            loss = calculate_loss(q_network, target_network, batch, args,
                                  Device.get_device())

            # Optimise for the batch
            loss = optimise_model(shared_model, q_network, loss, sgd, args,
                                  lock)

            # Log the results
            logging.debug('Batch loss: {:.2f}, Buffer size: {}'.format(
                loss, len(buffer)))
            writer.add_scalar('Batch loss', loss, itr)

            if itr % args.target_update_frequency == 0:
                target_network.load_state_dict(q_network.state_dict())

        writer.close()

    except KeyboardInterrupt:
        print('exiting optimiser:{:02}'.format(idx))
コード例 #18
0
def as_tensor(x, dtype=t.float32):
    return t.tensor(x,
                    dtype=dtype,
                    device=Device.get_device(),
                    requires_grad=False)