Ejemplo n.º 1
0
class TestMultiDatasetAlphabetNode:
    @pytest.mark.parametrize('params, should_pass', [
        (DatasetAlphabetParams(
            symbols="abcd",
            sequence_probs=DatasetAlphabetSequenceProbsModeParams(seqs=['abc'])
        ), True),
        (DatasetAlphabetParams(
            symbols="abcd",
            sequence_probs=DatasetAlphabetSequenceProbsModeParams(seqs=['abc', 'ae'])
        ), False)
    ])
    def test_validate_params_sequence_probs_validate_throws(self, params, should_pass):
        node = MultiDatasetAlphabetNode(params)
        if should_pass:
            node.validate()
        else:
            with raises(NodeValidationException):
                node.validate()
Ejemplo n.º 2
0
 def test_create_node(self, device):
     unit = DatasetAlphabetUnit(
         AllocatingCreator(device),
         DatasetAlphabetParams(
             symbols="abcd",
             sequence_probs=DatasetAlphabetSequenceProbsModeParams(
                 seqs=['abcd'])))
     assert [4, 7, 5] == list(unit.all_symbols.shape)
     assert [7, 5] == list(unit.output_data.shape)
Ejemplo n.º 3
0
 def test_mode_sequence_probs(self, device):
     unit = DatasetAlphabetUnit(
         AllocatingCreator(device),
         DatasetAlphabetParams(
             symbols="abcd",
             sequence_probs=DatasetAlphabetSequenceProbsModeParams(
                 seqs=['abc'])))
     generator = self.label_generator(unit)
     result = [next(generator) for _ in range(7)]
     assert [0, 1, 2, 0, 1, 2, 0] == result
Ejemplo n.º 4
0
    def __init__(self):
        super().__init__(device = 'cuda')

        dataset_params = DatasetAlphabetParams(symbols="abcd123456789", padding_right=1,
                                               sequence_probs=DatasetAlphabetSequenceProbsModeParams(
                                                   seqs=['abc', '123', '456'],
                                               ))

        dataset = MultiDatasetAlphabetNode(dataset_params, n_worlds=4)

        self.add_node(dataset)
Ejemplo n.º 5
0
    def create_node_dataset(self):
        def generate_sequence(symbols: str, count: int, skip: int):
            """Generate sequence of length `count` and stride `skip` from `symbols` - symbols are repeated when needed"""
            repeating_symbols = itertools.cycle(symbols)
            result = []
            for i in range(count):
                result.append(next(repeating_symbols))
                for s in range(skip):
                    next(repeating_symbols)
            return ''.join(result)

        def count_unique_symbols(sequences: List[str]) -> int:
            symbols = set()
            for seq in sequences:
                for symbol in seq:
                    symbols.add(symbol)
            return len(symbols)

        seqs = [
            str(
                generate_sequence(self._params.symbols,
                                  self._params.seq_length, i))
            for i in range(self._params.seq_count)
        ]

        # duplicate sequences
        for s in list(seqs):
            seqs.append(s)
        # make sequences longer (repeat)
        seqs = [s * self._params.seq_repeat for s in seqs]

        dataset_params = DatasetAlphabetParams(
            symbols=self._params.symbols,
            padding_right=1,
            sequence_probs=DatasetAlphabetSequenceProbsModeParams(seqs=seqs))
        return MultiDatasetAlphabetNode(dataset_params,
                                        n_worlds=self._params.flock_size)
Ejemplo n.º 6
0
 def test_create_node(self, device):
     unit = MultiDatasetAlphabetUnit(AllocatingCreator(device), DatasetAlphabetParams(
         symbols="abcd",
         sequence_probs=DatasetAlphabetSequenceProbsModeParams(seqs=['abcd'])
     ), n_worlds=self.n_worlds)
     assert [self.n_worlds, 7, 5] == list(unit.output_data.shape)
Ejemplo n.º 7
0
    def toploogy_dataset_test(self):
        dataset_params = DatasetAlphabetParams(
            symbols="abcd123456789",
            padding_right=1,
            sequence_probs=DatasetAlphabetSequenceProbsModeParams(
                seqs=['abc', '123', '456789', '468'],
                # transition_probs=[[0.7, 0.3], [0.3, 0.7], ]
            ))
        dataset_node = DatasetAlphabetNode(dataset_params)
        flock_size = 1
        # parent_cluster_centers = len(dataset_params.sequence_probs.seqs)
        parent_cluster_centers = 20  # len(dataset_params.sequence_probs.seqs)

        unsqueeze_node_child = UnsqueezeNode(0)
        unsqueeze_node_sequence_id = UnsqueezeNode(0)
        expand_node_child = ExpandNode(0, flock_size)
        expand_node_sequence_id = ExpandNode(0, flock_size)

        child_cluster_centers = len(dataset_params.symbols) - 1
        expert_node_child = ExpertFlockNode(
            ExpertParams(flock_size=flock_size,
                         n_cluster_centers=child_cluster_centers,
                         spatial=SpatialPoolerParams(),
                         temporal=TemporalPoolerParams(
                             incoming_context_size=parent_cluster_centers,
                             n_providers=2,
                             n_frequent_seqs=50,
                             seq_length=3,
                             seq_lookahead=1)))

        expert_node_parent = ExpertFlockNode(
            ExpertParams(flock_size=flock_size,
                         n_cluster_centers=parent_cluster_centers,
                         spatial=SpatialPoolerParams(),
                         temporal=TemporalPoolerParams(incoming_context_size=4,
                                                       n_frequent_seqs=50,
                                                       seq_length=3,
                                                       seq_lookahead=1)))

        expert_node_sequence_id = ExpertFlockNode(
            ExpertParams(flock_size=flock_size,
                         n_cluster_centers=2,
                         spatial=SpatialPoolerParams()))

        self.add_node(dataset_node)
        self.add_node(unsqueeze_node_child)
        # self.add_node(unsqueeze_node_sequence_id)
        self.add_node(expand_node_child)
        # self.add_node(expand_node_sequence_id)
        self.add_node(expert_node_child)
        self.add_node(expert_node_parent)
        # self.add_node(expert_node_sequence_id)

        Connector.connect(dataset_node.outputs.output,
                          unsqueeze_node_child.inputs.input)
        Connector.connect(unsqueeze_node_child.outputs.output,
                          expand_node_child.inputs.input)
        Connector.connect(expand_node_child.outputs.output,
                          expert_node_child.inputs.sp.data_input)
        # Connector.connect(dataset_node.outputs.sequence_id, unsqueeze_node_sequence_id.inputs.input)
        # Connector.connect(unsqueeze_node_sequence_id.outputs.output, expand_node_sequence_id.inputs.input)
        # Connector.connect(expand_node_sequence_id.outputs.output, expert_node_sequence_id.inputs.sp.data_input)

        Connector.connect(expert_node_child.outputs.tp.projection_outputs,
                          expert_node_parent.inputs.sp.data_input)
        # Parent context
        Connector.connect(expert_node_parent.outputs.output_context,
                          expert_node_child.inputs.tp.context_input,
                          is_backward=True)