예제 #1
0
 def __init__(self, params: DatasetAlphabetParams, seed: Optional[int] = None, n_worlds: int = 1,
              name: str = "DatasetAlphabet"):
     super().__init__(name=name, outputs=MultiDatasetAlphabetOutputs(self),
                      memory_blocks=MultiDatasetAlphabetInternals(self))
     self._params = params.clone()
     self._seed = seed
     self._n_worlds = n_worlds
class TestMultiDatasetAlphabetNode:
    @pytest.mark.parametrize('params, should_pass', [
        (DatasetAlphabetParams(
            symbols="abcd",
            sequence_probs=DatasetAlphabetSequenceProbsModeParams(seqs=['abc'])
        ), True),
        (DatasetAlphabetParams(
            symbols="abcd",
            sequence_probs=DatasetAlphabetSequenceProbsModeParams(seqs=['abc', 'ae'])
        ), False)
    ])
    def test_validate_params_sequence_probs_validate_throws(self, params, should_pass):
        node = MultiDatasetAlphabetNode(params)
        if should_pass:
            node.validate()
        else:
            with raises(NodeValidationException):
                node.validate()
예제 #3
0
 def test_create_node(self, device):
     unit = DatasetAlphabetUnit(
         AllocatingCreator(device),
         DatasetAlphabetParams(
             symbols="abcd",
             sequence_probs=DatasetAlphabetSequenceProbsModeParams(
                 seqs=['abcd'])))
     assert [4, 7, 5] == list(unit.all_symbols.shape)
     assert [7, 5] == list(unit.output_data.shape)
예제 #4
0
 def test_mode_sequence_probs(self, device):
     unit = DatasetAlphabetUnit(
         AllocatingCreator(device),
         DatasetAlphabetParams(
             symbols="abcd",
             sequence_probs=DatasetAlphabetSequenceProbsModeParams(
                 seqs=['abc'])))
     generator = self.label_generator(unit)
     result = [next(generator) for _ in range(7)]
     assert [0, 1, 2, 0, 1, 2, 0] == result
예제 #5
0
    def __init__(self):
        super().__init__(device = 'cuda')

        dataset_params = DatasetAlphabetParams(symbols="abcd123456789", padding_right=1,
                                               sequence_probs=DatasetAlphabetSequenceProbsModeParams(
                                                   seqs=['abc', '123', '456'],
                                               ))

        dataset = MultiDatasetAlphabetNode(dataset_params, n_worlds=4)

        self.add_node(dataset)
예제 #6
0
    def __init__(self, creator: TensorCreator, params: DatasetAlphabetParams, n_worlds: int,
                 random: Optional[RandomState] = None):
        super().__init__(creator.device)
        self._validate_params(params)
        self.n_worlds = n_worlds

        self._units = [DatasetAlphabetUnit(creator, params.clone(), random) for _ in range(self.n_worlds)]

        def stacked(tensor):
            size = [self.n_worlds] + list(tensor.shape)
            return creator.zeros(size, dtype=tensor.dtype, device=tensor.device)

        self._first_unit = self._units[0]

        # Create output tensors
        self.output_data = stacked(self._first_unit.output_data)
        self.output_label = stacked(self._first_unit.output_label)
        self.output_sequence_id = stacked(self._first_unit.output_sequence_id)
        self.output_sequence_id_one_hot = stacked(self._first_unit.output_sequence_id_one_hot)
        self.all_symbols = self._first_unit.all_symbols
예제 #7
0
    def create_node_dataset(self):
        def generate_sequence(symbols: str, count: int, skip: int):
            """Generate sequence of length `count` and stride `skip` from `symbols` - symbols are repeated when needed"""
            repeating_symbols = itertools.cycle(symbols)
            result = []
            for i in range(count):
                result.append(next(repeating_symbols))
                for s in range(skip):
                    next(repeating_symbols)
            return ''.join(result)

        def count_unique_symbols(sequences: List[str]) -> int:
            symbols = set()
            for seq in sequences:
                for symbol in seq:
                    symbols.add(symbol)
            return len(symbols)

        seqs = [
            str(
                generate_sequence(self._params.symbols,
                                  self._params.seq_length, i))
            for i in range(self._params.seq_count)
        ]

        # duplicate sequences
        for s in list(seqs):
            seqs.append(s)
        # make sequences longer (repeat)
        seqs = [s * self._params.seq_repeat for s in seqs]

        dataset_params = DatasetAlphabetParams(
            symbols=self._params.symbols,
            padding_right=1,
            sequence_probs=DatasetAlphabetSequenceProbsModeParams(seqs=seqs))
        return MultiDatasetAlphabetNode(dataset_params,
                                        n_worlds=self._params.flock_size)
 def test_create_node(self, device):
     unit = MultiDatasetAlphabetUnit(AllocatingCreator(device), DatasetAlphabetParams(
         symbols="abcd",
         sequence_probs=DatasetAlphabetSequenceProbsModeParams(seqs=['abcd'])
     ), n_worlds=self.n_worlds)
     assert [self.n_worlds, 7, 5] == list(unit.output_data.shape)
예제 #9
0
    def toploogy_dataset_test(self):
        dataset_params = DatasetAlphabetParams(
            symbols="abcd123456789",
            padding_right=1,
            sequence_probs=DatasetAlphabetSequenceProbsModeParams(
                seqs=['abc', '123', '456789', '468'],
                # transition_probs=[[0.7, 0.3], [0.3, 0.7], ]
            ))
        dataset_node = DatasetAlphabetNode(dataset_params)
        flock_size = 1
        # parent_cluster_centers = len(dataset_params.sequence_probs.seqs)
        parent_cluster_centers = 20  # len(dataset_params.sequence_probs.seqs)

        unsqueeze_node_child = UnsqueezeNode(0)
        unsqueeze_node_sequence_id = UnsqueezeNode(0)
        expand_node_child = ExpandNode(0, flock_size)
        expand_node_sequence_id = ExpandNode(0, flock_size)

        child_cluster_centers = len(dataset_params.symbols) - 1
        expert_node_child = ExpertFlockNode(
            ExpertParams(flock_size=flock_size,
                         n_cluster_centers=child_cluster_centers,
                         spatial=SpatialPoolerParams(),
                         temporal=TemporalPoolerParams(
                             incoming_context_size=parent_cluster_centers,
                             n_providers=2,
                             n_frequent_seqs=50,
                             seq_length=3,
                             seq_lookahead=1)))

        expert_node_parent = ExpertFlockNode(
            ExpertParams(flock_size=flock_size,
                         n_cluster_centers=parent_cluster_centers,
                         spatial=SpatialPoolerParams(),
                         temporal=TemporalPoolerParams(incoming_context_size=4,
                                                       n_frequent_seqs=50,
                                                       seq_length=3,
                                                       seq_lookahead=1)))

        expert_node_sequence_id = ExpertFlockNode(
            ExpertParams(flock_size=flock_size,
                         n_cluster_centers=2,
                         spatial=SpatialPoolerParams()))

        self.add_node(dataset_node)
        self.add_node(unsqueeze_node_child)
        # self.add_node(unsqueeze_node_sequence_id)
        self.add_node(expand_node_child)
        # self.add_node(expand_node_sequence_id)
        self.add_node(expert_node_child)
        self.add_node(expert_node_parent)
        # self.add_node(expert_node_sequence_id)

        Connector.connect(dataset_node.outputs.output,
                          unsqueeze_node_child.inputs.input)
        Connector.connect(unsqueeze_node_child.outputs.output,
                          expand_node_child.inputs.input)
        Connector.connect(expand_node_child.outputs.output,
                          expert_node_child.inputs.sp.data_input)
        # Connector.connect(dataset_node.outputs.sequence_id, unsqueeze_node_sequence_id.inputs.input)
        # Connector.connect(unsqueeze_node_sequence_id.outputs.output, expand_node_sequence_id.inputs.input)
        # Connector.connect(expand_node_sequence_id.outputs.output, expert_node_sequence_id.inputs.sp.data_input)

        Connector.connect(expert_node_child.outputs.tp.projection_outputs,
                          expert_node_parent.inputs.sp.data_input)
        # Parent context
        Connector.connect(expert_node_parent.outputs.output_context,
                          expert_node_child.inputs.tp.context_input,
                          is_backward=True)