def __init__(self, params: DatasetAlphabetParams, seed: Optional[int] = None, n_worlds: int = 1, name: str = "DatasetAlphabet"): super().__init__(name=name, outputs=MultiDatasetAlphabetOutputs(self), memory_blocks=MultiDatasetAlphabetInternals(self)) self._params = params.clone() self._seed = seed self._n_worlds = n_worlds
class TestMultiDatasetAlphabetNode: @pytest.mark.parametrize('params, should_pass', [ (DatasetAlphabetParams( symbols="abcd", sequence_probs=DatasetAlphabetSequenceProbsModeParams(seqs=['abc']) ), True), (DatasetAlphabetParams( symbols="abcd", sequence_probs=DatasetAlphabetSequenceProbsModeParams(seqs=['abc', 'ae']) ), False) ]) def test_validate_params_sequence_probs_validate_throws(self, params, should_pass): node = MultiDatasetAlphabetNode(params) if should_pass: node.validate() else: with raises(NodeValidationException): node.validate()
def test_create_node(self, device): unit = DatasetAlphabetUnit( AllocatingCreator(device), DatasetAlphabetParams( symbols="abcd", sequence_probs=DatasetAlphabetSequenceProbsModeParams( seqs=['abcd']))) assert [4, 7, 5] == list(unit.all_symbols.shape) assert [7, 5] == list(unit.output_data.shape)
def test_mode_sequence_probs(self, device): unit = DatasetAlphabetUnit( AllocatingCreator(device), DatasetAlphabetParams( symbols="abcd", sequence_probs=DatasetAlphabetSequenceProbsModeParams( seqs=['abc']))) generator = self.label_generator(unit) result = [next(generator) for _ in range(7)] assert [0, 1, 2, 0, 1, 2, 0] == result
def __init__(self): super().__init__(device = 'cuda') dataset_params = DatasetAlphabetParams(symbols="abcd123456789", padding_right=1, sequence_probs=DatasetAlphabetSequenceProbsModeParams( seqs=['abc', '123', '456'], )) dataset = MultiDatasetAlphabetNode(dataset_params, n_worlds=4) self.add_node(dataset)
def __init__(self, creator: TensorCreator, params: DatasetAlphabetParams, n_worlds: int, random: Optional[RandomState] = None): super().__init__(creator.device) self._validate_params(params) self.n_worlds = n_worlds self._units = [DatasetAlphabetUnit(creator, params.clone(), random) for _ in range(self.n_worlds)] def stacked(tensor): size = [self.n_worlds] + list(tensor.shape) return creator.zeros(size, dtype=tensor.dtype, device=tensor.device) self._first_unit = self._units[0] # Create output tensors self.output_data = stacked(self._first_unit.output_data) self.output_label = stacked(self._first_unit.output_label) self.output_sequence_id = stacked(self._first_unit.output_sequence_id) self.output_sequence_id_one_hot = stacked(self._first_unit.output_sequence_id_one_hot) self.all_symbols = self._first_unit.all_symbols
def create_node_dataset(self): def generate_sequence(symbols: str, count: int, skip: int): """Generate sequence of length `count` and stride `skip` from `symbols` - symbols are repeated when needed""" repeating_symbols = itertools.cycle(symbols) result = [] for i in range(count): result.append(next(repeating_symbols)) for s in range(skip): next(repeating_symbols) return ''.join(result) def count_unique_symbols(sequences: List[str]) -> int: symbols = set() for seq in sequences: for symbol in seq: symbols.add(symbol) return len(symbols) seqs = [ str( generate_sequence(self._params.symbols, self._params.seq_length, i)) for i in range(self._params.seq_count) ] # duplicate sequences for s in list(seqs): seqs.append(s) # make sequences longer (repeat) seqs = [s * self._params.seq_repeat for s in seqs] dataset_params = DatasetAlphabetParams( symbols=self._params.symbols, padding_right=1, sequence_probs=DatasetAlphabetSequenceProbsModeParams(seqs=seqs)) return MultiDatasetAlphabetNode(dataset_params, n_worlds=self._params.flock_size)
def test_create_node(self, device): unit = MultiDatasetAlphabetUnit(AllocatingCreator(device), DatasetAlphabetParams( symbols="abcd", sequence_probs=DatasetAlphabetSequenceProbsModeParams(seqs=['abcd']) ), n_worlds=self.n_worlds) assert [self.n_worlds, 7, 5] == list(unit.output_data.shape)
def toploogy_dataset_test(self): dataset_params = DatasetAlphabetParams( symbols="abcd123456789", padding_right=1, sequence_probs=DatasetAlphabetSequenceProbsModeParams( seqs=['abc', '123', '456789', '468'], # transition_probs=[[0.7, 0.3], [0.3, 0.7], ] )) dataset_node = DatasetAlphabetNode(dataset_params) flock_size = 1 # parent_cluster_centers = len(dataset_params.sequence_probs.seqs) parent_cluster_centers = 20 # len(dataset_params.sequence_probs.seqs) unsqueeze_node_child = UnsqueezeNode(0) unsqueeze_node_sequence_id = UnsqueezeNode(0) expand_node_child = ExpandNode(0, flock_size) expand_node_sequence_id = ExpandNode(0, flock_size) child_cluster_centers = len(dataset_params.symbols) - 1 expert_node_child = ExpertFlockNode( ExpertParams(flock_size=flock_size, n_cluster_centers=child_cluster_centers, spatial=SpatialPoolerParams(), temporal=TemporalPoolerParams( incoming_context_size=parent_cluster_centers, n_providers=2, n_frequent_seqs=50, seq_length=3, seq_lookahead=1))) expert_node_parent = ExpertFlockNode( ExpertParams(flock_size=flock_size, n_cluster_centers=parent_cluster_centers, spatial=SpatialPoolerParams(), temporal=TemporalPoolerParams(incoming_context_size=4, n_frequent_seqs=50, seq_length=3, seq_lookahead=1))) expert_node_sequence_id = ExpertFlockNode( ExpertParams(flock_size=flock_size, n_cluster_centers=2, spatial=SpatialPoolerParams())) self.add_node(dataset_node) self.add_node(unsqueeze_node_child) # self.add_node(unsqueeze_node_sequence_id) self.add_node(expand_node_child) # self.add_node(expand_node_sequence_id) self.add_node(expert_node_child) self.add_node(expert_node_parent) # self.add_node(expert_node_sequence_id) Connector.connect(dataset_node.outputs.output, unsqueeze_node_child.inputs.input) Connector.connect(unsqueeze_node_child.outputs.output, expand_node_child.inputs.input) Connector.connect(expand_node_child.outputs.output, expert_node_child.inputs.sp.data_input) # Connector.connect(dataset_node.outputs.sequence_id, unsqueeze_node_sequence_id.inputs.input) # Connector.connect(unsqueeze_node_sequence_id.outputs.output, expand_node_sequence_id.inputs.input) # Connector.connect(expand_node_sequence_id.outputs.output, expert_node_sequence_id.inputs.sp.data_input) Connector.connect(expert_node_child.outputs.tp.projection_outputs, expert_node_parent.inputs.sp.data_input) # Parent context Connector.connect(expert_node_parent.outputs.output_context, expert_node_child.inputs.tp.context_input, is_backward=True)