def __init__(self, seed: int = None): super().__init__('cuda') self.se_world_params = DatasetSENavigationParams( dataset_size=SeDatasetSize.SIZE_24) self.se_world_params.sampling_method = SamplingMethod.ORDERED self.sp_params = ExpertParams() self.sp_params.n_cluster_centers = 10 self.sp_params.spatial.input_size = \ self.se_world_params.dataset_dims[0] * \ self.se_world_params.dataset_dims[1] * \ DatasetSeBase.N_CHANNELS self.sp_params.flock_size = 3 self.sp_params.spatial.buffer_size = 100 self.sp_params.spatial.batch_size = 45 self.sp_params.spatial.cluster_boost_threshold = 30 # create the node instances se_dataset = DatasetSeNavigationNode(self.se_world_params, seed=seed) expand_node = ExpandNode(dim=0, desired_size=self.sp_params.flock_size) sp_node = SpatialPoolerFlockNode(self.sp_params, seed=seed) self.add_node(se_dataset) self.add_node(expand_node) self.add_node(sp_node) Connector.connect(se_dataset.outputs.image_output, expand_node.inputs.input) Connector.connect(expand_node.outputs.output, sp_node.inputs.sp.data_input) set_global_seeds(seed)
def make_nnet(input_size: int = 16, num_classes: int = 5, seed: int = 123, buffer_s: int = 16, batch_s: int = 8, mixed_mode: bool = True): device = 'cuda' # set topology params and configs _params = NNetParams(NNetParams.default_params()) # _params.set_params(_nn_node_params) # params defined in this file # ._params.set_params(kwargs) # params defined in the constructor # small input sizes for testing _params.input_shape = (3, input_size, input_size) _params.output_size = num_classes _params.seed = seed _params.batch_size = batch_s _params.buffer_size = buffer_s _params.num_epochs = 3 _params.mixed_mode = mixed_mode # observation storage params _observation_types = { 'x': (_params.buffer_size, *_params.input_shape), # observations 'y': (_params.buffer_size, _params.output_size), # labels } # data storage _storage = ObservationStorage(_params.buffer_size, _observation_types) _storage.to('cpu' if _params.mixed_mode else device) # network needs to have the global seeds to have set before creating (outside of the node in this case) set_global_seeds(seed=_params.seed) # neural network setup _network = NNet(input_shape=_params.input_shape, output_shape=_params.output_size).to( 'cuda' if _params.mixed_mode else device) # neural net optimizer _optimizer = optim.Adam(_network.parameters(), lr=_params.lr) # NNet Node _nnet_node = NNetNode(_network, _optimizer, _storage, _params, name='Neural Network Node') creator = AllocatingCreator(device=device) _nnet_node.allocate_memory_blocks(creator) return _params, _nnet_node
def __init__(self, seed: int = 0, device: str = 'cuda', eox: int = 2, eoy: int = 2, num_cc=30, batch_s=150): super().__init__(eox, eoy) # compute/setup parameters of the model se_world_params, self._sy, self._sx, self._no_channels = init_se_dataset_world_params( random_order=False) flock_size, input_size = compute_flock_sizes(self._sy, self._sx, self._no_channels, self._eoy, self._eox) expert_params = setup_flock_params(no_clusters=num_cc, buffer_size=batch_s * 2, batch_size=batch_s, tp_learn_period=100, max_enc_seq=1000, flock_size=flock_size, input_size=input_size) flock_input_size, flock_output_size = compute_lrf_params( self._sy, self._sx, self._no_channels, self._eoy, self._eox) # crate nodes self._se_dataset = DatasetSeNavigationNode(se_world_params, seed=seed) self._lrf_node = ReceptiveFieldNode(flock_input_size, flock_output_size) self._sp_node = SpatialPoolerFlockNode(expert_params, seed=seed) self._rgb_debug_node = RgbDebugNode( input_dims=flock_input_size, channel_first=False) # just a debug self.add_node(self._se_dataset) self.add_node(self._lrf_node) self.add_node(self._sp_node) self.add_node(self._rgb_debug_node) # connect Dataset -> LRF -> SP Connector.connect(self._se_dataset.outputs.image_output, self._lrf_node.inputs[0]) Connector.connect(self._lrf_node.outputs[0], self._sp_node.inputs.sp.data_input) # Dataset -> debug Connector.connect(self._se_dataset.outputs.image_output, self._rgb_debug_node.inputs.input) set_global_seeds(seed)
def test_gather_from_dim(capsys): @measure_time(iterations=200, function_repetitions=1000) def measured_function(): torch.index_select(input_tensor, 1, indices, out=result) device = 'cuda' float_dtype = get_float(device) input_tensor = torch.rand((10, 10, 10), dtype=float_dtype, device=device) set_global_seeds(1) indices = torch.rand(10, dtype=float_dtype, device=device) < 0.4 indices = indices.nonzero().squeeze(1) result = torch.empty(1, dtype=float_dtype, device=device) with capsys.disabled(): measured_function()
def test_node_accessor_and_determinism(device): node = RandomNumberNode(lower_bound=LOWER_BOUND, upper_bound=UPPER_BOUND, seed=SEED) node.allocate_memory_blocks(AllocatingCreator(device)) # expected sequence assert generate_and_validate_sequence(node) # expected after re-allocating MBs node.allocate_memory_blocks(AllocatingCreator(device)) assert generate_and_validate_sequence(node) # sequence independent on the global seeds node.allocate_memory_blocks(AllocatingCreator(device)) set_global_seeds(None) assert generate_and_validate_sequence(node)
def run_num_steps(num_steps: int, input_tensor: torch.Tensor, input_label: torch.Tensor, nnet_1: NNetNode, nnet_2: NNetNode = None): for nnet in [nnet_1, nnet_2]: set_global_seeds(123) if nnet is not None: for step in range(num_steps): # print(f'----------------------------- step {step}') input_tensor.copy_(_random_image(nnet_1._params.input_shape)) input_label.copy_(_random_label(nnet_1._params.output_size)) # make the step with all networks we have nnet.step()
def _update_memory_blocks(self): # Set seeds to provide defaults for nodes which didn't care to set the seeds themselves. set_global_seeds(self._seed) for _ in range(self._max_block_update_iterations): self.allocate_memory_blocks(self._measuring_creator) changed = self.detect_dims_change() if not changed: break else: # If the cycle didn't break, we can't run the model. raise MemoryBlockSizesNotConvergingException() # All memory block now contain the surrogates with the dimensions that don't change anymore, # so these can now be used for the real allocation. self.allocate_memory_blocks(self._allocating_creator) # Reset global seeds before simulation run. set_global_seeds(self._seed)
def test_global_seeds(device): seed = 345 set_global_seeds(seed) tensor1 = torch.rand([5, 2], device=device) set_global_seeds(seed) tensor2 = torch.rand([5, 2], device=device) assert same(tensor1, tensor2) seed = None set_global_seeds(seed) tensor1 = torch.rand([5, 2], device=device) set_global_seeds(seed) tensor2 = torch.rand([5, 2], device=device) assert not same(tensor1, tensor2)
def _create_unit(self, creator: TensorCreator) -> ConvSpatialPoolerFlockUnit: self._derive_params() set_global_seeds(self._seed) return ConvSpatialPoolerFlockUnit(creator, self.params)
def __init__(self, seed: int = 0, device: str = 'cuda', eox: int = 2, eoy: int = 2, num_cc: int = 100, batch_s=300, tp_learn_period=50, tp_max_enc_seq=1000, se_skip_frames=9): super().__init__(eox, eoy) self._se_config = SpaceEngineersConnectorConfig() self._se_config.skip_frames = se_skip_frames self._se_config.curriculum = [0, -1] # compute/setup parameters of the model _, self._sy, self._sx, self._no_channels = init_se_dataset_world_params( random_order=False) flock_size, input_size = compute_flock_sizes(self._sy, self._sx, self._no_channels, self._eoy, self._eox) expert_params = setup_flock_params(no_clusters=num_cc, buffer_size=batch_s * 2, batch_size=batch_s, tp_learn_period=tp_learn_period, max_enc_seq=tp_max_enc_seq, flock_size=flock_size, input_size=input_size) flock_input_size, flock_output_size = compute_lrf_params( self._sy, self._sx, self._no_channels, self._eoy, self._eox) # SE nodes self._actions_descriptor = SpaceEngineersActionsDescriptor() self._node_se_connector = SpaceEngineersConnectorNode( self._actions_descriptor, self._se_config) self._node_action_monitor = ActionMonitorNode(self._actions_descriptor) self._blank_action = ConstantNode( shape=self._actions_descriptor.ACTION_COUNT, constant=0) self._blank_task_data = ConstantNode( shape=self._se_config.agent_to_task_buffer_size, constant=0) # flock-related nodes self._lrf_node = ReceptiveFieldNode(flock_input_size, flock_output_size) self._flock_node = ExpertFlockNode(expert_params, seed=seed) self._zero_context = ConstantNode( shape=(expert_params.flock_size, NUMBER_OF_CONTEXT_TYPES, expert_params.temporal.incoming_context_size), constant=0) self._blank_task_control = ConstantNode( shape=self._se_config.TASK_CONTROL_SIZE, constant=0) # add nodes to the graph self.add_node(self._lrf_node) self.add_node(self._flock_node) self.add_node(self._zero_context) self.add_node(self._node_se_connector) self.add_node(self._node_action_monitor) self.add_node(self._blank_action) self.add_node(self._blank_task_data) self.add_node(self._blank_task_control) # connect SE -> LRF -> SP Connector.connect(self._node_se_connector.outputs.image_output, self._lrf_node.inputs[0]) Connector.connect(self._lrf_node.outputs[0], self._flock_node.inputs.sp.data_input) Connector.connect(self._zero_context.outputs.output, self._flock_node.inputs.tp.context_input) # connect NOOP -> action_override Connector.connect(self._blank_action.outputs.output, self._node_action_monitor.inputs.action_in) Connector.connect(self._node_action_monitor.outputs.action_out, self._node_se_connector.inputs.agent_action) # connect blank_task_data -> SE aux input Connector.connect(self._blank_task_data.outputs.output, self._node_se_connector.inputs.agent_to_task_label) Connector.connect(self._blank_task_control.outputs.output, self._node_se_connector.inputs.task_control) # prepare for run set_global_seeds(seed) self._last_step_duration = 0
def _init_seed(self, seed: int): """Determines whether these measurements will be deterministic (across different runs).""" self.rand = np.random.RandomState() self.rand.seed(seed=seed) set_global_seeds(seed)
def __init__(self, num_labels: int, buffer_s: int, batch_s: int, model_seed: int, lr: float, num_epochs: int, image_size=SeDatasetSize.SIZE_24, num_channels=3): """ Initialize the node group containing the NN used as a baseline for Task0 Args: num_labels: num labels in the dataset (20 for the Task0) image_size: size of the image, 24 by default (the result is 24*24*3) then model_seed: used for deterministic experiments lr: learning rate """ super().__init__("Task 0 - NN Model", inputs=ClassificationTaskInputs(self)) # output layer size self._num_labels = num_labels # the network should configure output size from here ideally img_size = image_size.value # input size is 3 * img_size **2 kwargs = {'lr': lr, 'buffer_size': buffer_s, 'batch_size': batch_s, 'seed': model_seed, 'input_shape': (num_channels, img_size, img_size), # note: this is correct (see node.step()) 'output_size': self._num_labels, 'num_epochs': num_epochs} # set topology params and configs self._params = NNetParams(NNetParams.default_params()) self._params.set_params(_nn_node_params) # params defined in this file self._params.set_params(kwargs) # params defined in the constructor # observation storage params self._observation_types = { 'x': (self._params.buffer_size, *self._params.input_shape), # observations 'y': (self._params.buffer_size, self._params.output_size), # labels } # data storage self._storage = ObservationStorage( self._params.buffer_size, self._observation_types) self._storage.to('cpu' if self._params.mixed_mode else self.device) # network needs to have the global seeds to have set before creating (outside of the node in this case) set_global_seeds(seed=self._params.seed) # neural network setup self._network = NNet( input_shape=self._params.input_shape, output_shape=self._params.output_size ).to('cuda' if self._params.mixed_mode else self.device) # neural net optimizer self._optimizer = optim.Adam(self._network.parameters(), lr=self._params.lr) # NNet Node self._nnet_node = NNetNode( self._network, self._optimizer, self._storage, self._params, name='Neural Network Node') self.add_node(self._nnet_node) # connect the input of the network Connector.connect( self.inputs.image.output, self._nnet_node.inputs.input ) # source of targets for learning here Connector.connect( self.inputs.label.output, self._nnet_node.inputs.label ) # switching train/test is done by input self._constant_zero = ConstantNode([1], constant=0, name="zero") self._constant_one = ConstantNode([1], constant=1, name="one") self._switch_node = SwitchNode(2) # outputs 1 if is_testing self.add_node(self._constant_zero) self.add_node(self._constant_one) self.add_node(self._switch_node) Connector.connect( self._constant_zero.outputs.output, self._switch_node.inputs[0] ) Connector.connect( self._constant_one.outputs.output, self._switch_node.inputs[1] ) Connector.connect( self._switch_node.outputs.output, self._nnet_node.inputs.testing_phase ) self._is_training = True
def _create_unit(self, creator: TensorCreator) -> ExpertFlockUnit: self._derive_params() set_global_seeds(self._seed) return ExpertFlockUnit(creator, self.params)
def test_sample_learning_batch_balanced_sampling(self): """Extract the the clusters to which the sampled data belong. Then check that each received roughly similar amount of points from the buffer. """ flock_size = 2 buffer_size = 1000 input_size = 5 n_cluster_centers = 3 batch_size = 300 device = 'cpu' float_dtype = get_float(device) creator = AllocatingCreator(device) buffer = SPFlockBuffer(creator, flock_size, buffer_size, input_size, n_cluster_centers) set_global_seeds(None) buffer.inputs.stored_data.random_() buffer.total_data_written.fill_(9999) def get_cluster_center(j): if j % 3 == 0: return [1, 0, 0] elif j % 3 == 1: return [0, 1, 0] elif j % 3 == 2: return [0, 0, 1] cluster_centers = [[ get_cluster_center(i) for i in range(buffer_size) ], [get_cluster_center(i + 1) for i in range(buffer_size)]] buffer.clusters.stored_data = torch.tensor(cluster_centers, dtype=float_dtype, device=device) out = torch.zeros((flock_size, batch_size, input_size), dtype=float_dtype, device=device) buffer.sample_learning_batch(batch_size, out, sampling_method=SamplingMethod.BALANCED) indices = [] for item_idx in range(batch_size): sampled_item = out[:, item_idx, :].view(flock_size, 1, input_size) # indices in the buffer which correspond to this sampled item [expert_id, index in the buffer] match = (buffer.inputs.stored_data == sampled_item).all( dim=2).nonzero() indices.append(match[:, 1]) # pick just the index # indices of datapoints for each expert indices = torch.stack(indices, dim=1) sampled_clusters = [] for expert_id in range(flock_size): expert_clusters = buffer.clusters.stored_data[expert_id] expert_indices = indices[expert_id] sampled_clusters.append( expert_clusters.index_select(dim=0, index=expert_indices).sum(dim=0)) sampled_clusters = torch.stack(sampled_clusters, dim=0) # sampled_clusters should be roughly uniform (checking +- 15) assert 85 <= sampled_clusters.min() assert 115 >= sampled_clusters.max()
def __init__(self, **kwargs): super().__init__(device='cpu') self._current_step = 0 # set topology params and configs self._params = NNetParams(NNetParams.default_params()) self._params.set_params(_nn_node_params) # params defined in this file self._params.set_params(kwargs) # params defined in GUI # SE config and setup self._se_config = SpaceEngineersConnectorConfig() self._se_config.curriculum = list(self._params.curriculum) self._actions_descriptor = SpaceEngineersActionsDescriptor() # set SE specific params automatically self._params.set_params({ 'input_shape': (3, self._se_config.render_width, self._se_config.render_height), 'output_size': self._se_config.agent_to_task_buffer_size }) # observation storage params self._observation_types = { 'x': (self._params.buffer_size, *self._params.input_shape), # observations 'y': (self._params.buffer_size, self._params.output_size), # labels } # data storage self._storage = ObservationStorage(self._params.buffer_size, self._observation_types) self._storage.to('cpu' if self._params.mixed_mode else self.device) # network needs to have the global seeds to have set before creating (outside of the node in this case) set_global_seeds(seed=self._params.seed) # ================================================== # NOTE: Replace here with your own architecture. # It needs to be able to take the correct # input and output (shape/size) # ================================================== # neural network setup self._network = NNet( input_shape=self._params.input_shape, output_shape=self._params.output_size).to( 'cuda' if self._params.mixed_mode else self.device) # ================================================== # neural net optimizer self._optimizer = optim.Adam(self._network.parameters(), lr=self._params.lr) # SE Node self._se_connector = SpaceEngineersConnectorNode( self._actions_descriptor, self._se_config) # NNet Node self._nnet_node = NNetNode(self._network, self._optimizer, self._storage, self._params, name='Neural Network Node') # add nodes to the topology self.add_node(self._nnet_node) self.add_node(self._se_connector) # connect it all up Connector.connect(self._se_connector.outputs.image_output, self._nnet_node.inputs.input) Connector.connect(self._se_connector.outputs.task_to_agent_label, self._nnet_node.inputs.label) Connector.connect(self._se_connector.outputs.metadata_testing_phase, self._nnet_node.inputs.testing_phase) Connector.connect(self._nnet_node.outputs.output, self._se_connector.inputs.agent_action, is_backward=True) Connector.connect(self._nnet_node.outputs.label, self._se_connector.inputs.agent_to_task_label, is_backward=True) # necessary, but not used connector # TODO: remove once node is not needing this Connector.connect(self._nnet_node.outputs.task_control, self._se_connector.inputs.task_control, is_backward=True)
data[2].copy_(data[1], non_blocking=non_blocking) torch.cuda.synchronize() total_end = time.time() elapsed_time = total_end - total_start print( f"\tIterations per second: {measurement_iterations / elapsed_time:.1f}" ) print(f"\tTotal time: {elapsed_time:.3f}") print(f"\tAssuming float type size: {float_size} B") print(f"Block size: {block_size * float_size} MB") speed_gbps = block_size * float_size * vector_count * measurement_iterations / elapsed_time / 1024.0 print(f"*** Copying speed ***: {speed_gbps:.1f} GB/s\n") def bench(): for block_size_mb in [16, 256]: bench_block_size(block_size_mb) if __name__ == '__main__': set_global_seeds(100) torch.cuda.set_device(0) # os.environ['THC_CACHING_ALLOCATOR'] = '0' torch.set_grad_enabled(False) bench()
def __init__(self, run_just_sp: bool = False, seed: int = None, device: str = 'cuda', eox: int = 2, eoy: int = 2, num_cc: int = 100, batch_s=300, tp_learn_period=50, tp_max_enc_seq=1000): super().__init__(eox, eoy) # compute/setup parameters of the model se_world_params, self._sy, self._sx, self._no_channels = init_se_dataset_world_params( random_order=False) flock_size, input_size = compute_flock_sizes(self._sy, self._sx, self._no_channels, self._eoy, self._eox) expert_params = setup_flock_params(no_clusters=num_cc, buffer_size=batch_s * 2, batch_size=batch_s, tp_learn_period=tp_learn_period, max_enc_seq=tp_max_enc_seq, flock_size=flock_size, input_size=input_size) flock_input_size, flock_output_size = compute_lrf_params( self._sy, self._sx, self._no_channels, self._eoy, self._eox) # crate nodes self._se_dataset = DatasetSeNavigationNode(se_world_params, seed=seed) self._lrf_node = ReceptiveFieldNode(flock_input_size, flock_output_size) if run_just_sp: self._flock_node = SpatialPoolerFlockNode(expert_params, seed=seed) else: self._flock_node = ExpertFlockNode(expert_params, seed=seed) self._zero_context = ConstantNode( shape=(expert_params.flock_size, expert_params.temporal.n_providers, NUMBER_OF_CONTEXT_TYPES, expert_params.temporal.incoming_context_size), constant=0) # add nodes to the graph self.add_node(self._se_dataset) self.add_node(self._lrf_node) self.add_node(self._flock_node) self.add_node(self._zero_context) # connect Dataset -> LRF -> SP Connector.connect(self._se_dataset.outputs.image_output, self._lrf_node.inputs[0]) Connector.connect(self._lrf_node.outputs[0], self._flock_node.inputs.sp.data_input) if not run_just_sp: Connector.connect(self._zero_context.outputs.output, self._flock_node.inputs.tp.context_input) # prepare for run set_global_seeds(seed) self._last_step_duration = 0
def _create_unit(self, creator: TensorCreator) -> Unit: set_global_seeds(self._seed) return TemporalPoolerFlockUnit(creator, self.params)