def __init__(self): super().__init__("Single Expert") expert_params = ExpertParams() expert_params.flock_size = 1 expert_params.n_cluster_centers = 24 expert_params.produce_actions = True expert_params.temporal.seq_length = 9 expert_params.temporal.seq_lookahead = 7 expert_params.temporal.n_frequent_seqs = 700 expert_params.temporal.max_encountered_seqs = 1000 expert_params.temporal.exploration_probability = 0.01 expert_params.temporal.batch_size = 200 expert_params.temporal.own_rewards_weight = 20 expert_params.temporal.compute_backward_pass = True expert_params.compute_reconstruction = True expert_node = ExpertFlockNode(expert_params) self.add_node(expert_node) Connector.connect(self.inputs.data.output, expert_node.inputs.sp.data_input) Connector.connect(self.inputs.reward.output, expert_node.inputs.tp.reward_input) Connector.connect(expert_node.outputs.sp.predicted_reconstructed_input, self.outputs.predicted_reconstructed_input.input)
def __init__(self, c_n_ccs, c_buffer_size, c_seq_length, c_seq_lookahead, p_seq_length, p_seq_lookahead, p_n_ccs, flock_size): super().__init__("Two Experts") expert_params1 = ExpertParams() expert_params1.flock_size = flock_size expert_params1.n_cluster_centers = c_n_ccs expert_params1.produce_actions = True expert_params1.temporal.seq_length = c_seq_length expert_params1.temporal.seq_lookahead = c_seq_lookahead expert_params1.temporal.n_frequent_seqs = 700 expert_params1.temporal.max_encountered_seqs = 1000 expert_params1.temporal.exploration_probability = 0.05 expert_params1.temporal.batch_size = 200 expert_params1.temporal.compute_backward_pass = True expert_params1.temporal.frustration_threshold = 2 expert_params2 = expert_params1.clone() expert_params1.spatial.buffer_size = c_buffer_size expert_params1.compute_reconstruction = True expert_params2.temporal.seq_length = p_seq_length expert_params2.temporal.seq_lookahead = p_seq_lookahead expert_params2.n_cluster_centers = p_n_ccs expert_params2.produce_actions = False expert_params2.temporal.frustration_threshold = 10 expert_node1 = ExpertFlockNode(expert_params1) expert_node2 = ExpertFlockNode(expert_params2) self.add_node(expert_node1) self.add_node(expert_node2) Connector.connect(self.inputs.data.output, expert_node1.inputs.sp.data_input) Connector.connect(self.inputs.reward.output, expert_node1.inputs.tp.reward_input) Connector.connect(self.inputs.reward.output, expert_node2.inputs.tp.reward_input) # Connect the experts to each other. Connector.connect(expert_node1.outputs.tp.projection_outputs, expert_node2.inputs.sp.data_input) Connector.connect(expert_node2.outputs.output_context, expert_node1.inputs.tp.context_input, is_backward=True) # Connect the group output. Connector.connect(expert_node1.outputs.sp.predicted_reconstructed_input, self.outputs.predicted_reconstructed_input.input)
def __init__(self): super().__init__(device='cuda') actions_descriptor = GridWorldActionDescriptor() node_action_monitor = ActionMonitorNode(actions_descriptor) params = GridWorldParams(map_name='MapE') noise_params = RandomNoiseParams(amplitude=0.0001) node_grid_world = GridWorldNode(params) expert_params = ExpertParams() unsqueeze_node = UnsqueezeNode(dim=0) noise_node = RandomNoiseNode(noise_params) constant_node = ConstantNode(shape=(1, 1, 3, 48)) one_hot_node = ToOneHotNode() def context(inputs, outputs): con = inputs[0] con[:, :, 1:, 24:] = float('nan') outputs[0].copy_(con) def f(inputs, outputs): probs = inputs[0] outputs[0].copy_(probs[0, -1, :4] + SMALL_CONSTANT) action_parser = LambdaNode(func=f, n_inputs=1, output_shapes=[(4,)]) context_assembler = LambdaNode(func=context, n_inputs=1, output_shapes=[(1, 1, 3, 48)]) expert_params.flock_size = 1 expert_params.n_cluster_centers = 24 expert_params.produce_actions = True expert_params.temporal.seq_length = 9 expert_params.temporal.seq_lookahead = 7 expert_params.temporal.n_frequent_seqs = 700 expert_params.temporal.max_encountered_seqs = 1000 expert_params.temporal.exploration_probability = 0.01 expert_params.temporal.batch_size = 200 expert_params.temporal.own_rewards_weight = 20 expert_params.temporal.incoming_context_size = 48 expert_params.compute_reconstruction = True #expert_node = ConvExpertFlockNode(expert_params) expert_node = ExpertFlockNode(expert_params) self.add_node(node_grid_world) self.add_node(node_action_monitor) self.add_node(expert_node) self.add_node(unsqueeze_node) self.add_node(action_parser) self.add_node(noise_node) self.add_node(constant_node) self.add_node(context_assembler) self.add_node(one_hot_node) Connector.connect(node_grid_world.outputs.egocentric_image_action, noise_node.inputs.input) Connector.connect(noise_node.outputs.output, unsqueeze_node.inputs.input) Connector.connect(unsqueeze_node.outputs.output, expert_node.inputs.sp.data_input) Connector.connect(node_grid_world.outputs.reward, expert_node.inputs.tp.reward_input) Connector.connect(constant_node.outputs.output, context_assembler.inputs[0]) Connector.connect(context_assembler.outputs[0], expert_node.inputs.tp.context_input) Connector.connect(expert_node.outputs.sp.predicted_reconstructed_input, action_parser.inputs[0]) Connector.connect(action_parser.outputs[0], one_hot_node.inputs.input) Connector.connect(one_hot_node.outputs.output, node_action_monitor.inputs.action_in) Connector.connect(node_action_monitor.outputs.action_out, node_grid_world.inputs.agent_action, is_backward=True)
def __init__(self): super().__init__(device='cuda') actions_descriptor = GridWorldActionDescriptor() node_action_monitor = ActionMonitorNode(actions_descriptor) params = GridWorldParams(map_name='MapTwoRoom', reset_strategy=ResetStrategy.ANYWHERE) noise_params = RandomNoiseParams(amplitude=0.0001) node_grid_world = GridWorldNode(params) expert_params = ExpertParams() unsqueeze_node = UnsqueezeNode(dim=0) noise_node = RandomNoiseNode(noise_params) one_hot_node = ToOneHotNode() def f(inputs, outputs): probs = inputs[0] outputs[0].copy_(probs[0, -1, :4] + SMALL_CONSTANT) action_parser = LambdaNode(func=f, n_inputs=1, output_shapes=[(4, )]) expert_params.flock_size = 1 expert_params.n_cluster_centers = 64 expert_params.produce_actions = True expert_params.temporal.seq_length = 17 expert_params.temporal.seq_lookahead = 13 expert_params.temporal.n_frequent_seqs = 700 expert_params.temporal.max_encountered_seqs = 1000 expert_params.temporal.exploration_probability = 0.05 expert_params.temporal.batch_size = 200 expert_params.temporal.buffer_size = 1000 expert_params.temporal.own_rewards_weight = 20 expert_params.temporal.frustration_threshold = 2 expert_params.temporal.compute_backward_pass = True expert_params.compute_reconstruction = True expert_node = ConvExpertFlockNode(expert_params) #expert_node = ExpertFlockNode(expert_params) self.add_node(node_grid_world) self.add_node(node_action_monitor) self.add_node(expert_node) self.add_node(unsqueeze_node) self.add_node(action_parser) self.add_node(noise_node) self.add_node(one_hot_node) Connector.connect(node_grid_world.outputs.egocentric_image_action, noise_node.inputs.input) Connector.connect(noise_node.outputs.output, unsqueeze_node.inputs.input) Connector.connect(unsqueeze_node.outputs.output, expert_node.inputs.sp.data_input) Connector.connect(node_grid_world.outputs.reward, expert_node.inputs.tp.reward_input) Connector.connect(expert_node.outputs.sp.predicted_reconstructed_input, action_parser.inputs[0]) Connector.connect(action_parser.outputs[0], one_hot_node.inputs.input) Connector.connect(one_hot_node.outputs.output, node_action_monitor.inputs.action_in) Connector.connect(node_action_monitor.outputs.action_out, node_grid_world.inputs.agent_action, is_backward=True)
def __init__(self): super().__init__(device='cuda') actions_descriptor = GridWorldActionDescriptor() node_action_monitor = ActionMonitorNode(actions_descriptor) params = GridWorldParams(map_name='MapE') noise_params = RandomNoiseParams(amplitude=0.0001) node_grid_world = GridWorldNode(params) expert_params1 = ExpertParams() unsqueeze_node = UnsqueezeNode(dim=0) noise_node = RandomNoiseNode(noise_params) one_hot_node = ToOneHotNode() def f(inputs, outputs): probs = inputs[0] outputs[0].copy_(probs[0, -1, :4] + SMALL_CONSTANT) action_parser = LambdaNode(func=f, n_inputs=1, output_shapes=[(4, )]) expert_params1.flock_size = 1 expert_params1.n_cluster_centers = 24 expert_params1.produce_actions = True expert_params1.temporal.seq_length = 4 expert_params1.temporal.seq_lookahead = 2 expert_params1.temporal.n_frequent_seqs = 700 expert_params1.temporal.max_encountered_seqs = 1000 expert_params1.temporal.exploration_probability = 0.05 expert_params1.temporal.batch_size = 200 expert_params1.temporal.frustration_threshold = 2 # expert_params.temporal.own_rewards_weight = 20 expert_params1.compute_reconstruction = True expert_params2 = expert_params1.clone() expert_params2.temporal.seq_length = 5 expert_params2.temporal.seq_lookahead = 4 expert_params2.n_cluster_centers = 8 expert_params2.produce_actions = False expert_params2.temporal.frustration_threshold = 10 #expert_params1.temporal.incoming_context_size = 2 * expert_params2.n_cluster_centers expert_node1 = ExpertFlockNode(expert_params1) expert_node2 = ExpertFlockNode(expert_params2) self.add_node(node_grid_world) self.add_node(node_action_monitor) self.add_node(expert_node1) self.add_node(expert_node2) self.add_node(unsqueeze_node) self.add_node(action_parser) self.add_node(noise_node) self.add_node(one_hot_node) Connector.connect(node_grid_world.outputs.output_image_action, noise_node.inputs.input) Connector.connect(noise_node.outputs.output, unsqueeze_node.inputs.input) Connector.connect(unsqueeze_node.outputs.output, expert_node1.inputs.sp.data_input) Connector.connect(expert_node1.outputs.tp.projection_outputs, expert_node2.inputs.sp.data_input) Connector.connect(expert_node2.outputs.output_context, expert_node1.inputs.tp.context_input, is_backward=True) Connector.connect( expert_node1.outputs.sp.predicted_reconstructed_input, action_parser.inputs[0]) Connector.connect(node_grid_world.outputs.reward, expert_node1.inputs.tp.reward_input) Connector.connect(node_grid_world.outputs.reward, expert_node2.inputs.tp.reward_input) Connector.connect(action_parser.outputs[0], one_hot_node.inputs.input) Connector.connect(one_hot_node.outputs.output, node_action_monitor.inputs.action_in) Connector.connect(node_action_monitor.outputs.action_out, node_grid_world.inputs.agent_action, is_backward=True)