コード例 #1
0
    def test_fp_rate(self):
        pattern = [(10, 123), (10, 456), (10, 789)]
        eg = ExamplesGenerator(seed=7357, pattern=pattern, fp_rate=0.5)

        examples = [next(eg()) for _ in range(1000)]
        positive_examples = [e[0] for e in examples if e[1]]
        has_pattern = [eg.has_pattern(e, pattern) for e in positive_examples]
        self.assertTrue(sum(has_pattern) < 0.6 * len(positive_examples))
コード例 #2
0
    def test_has_pattern(self):
        eg, _ = self._get_eg()
        self.assertFalse(eg.has_pattern(list(range(100)), eg.pattern))

        """ test recursive find"""

        eg = ExamplesGenerator(seed=7357, pattern=[(3, 123), (3, 456), (3, 789)])
        example = [1, 123, 123, 1, 456, 456, 1, 1, 789]
        self.assertTrue(eg.has_pattern(example, eg.pattern))
コード例 #3
0
 def test_multiple_patterns(self):
     eg = ExamplesGenerator(seed=7357, multiple_patterns=[
         [
             [(10, 123), (10, 456), (10, 789)],
             [(10, 789), (10, 456), (10, 123)]
         ],
         [0.8, 0.2]
     ])
     examples = [next(eg()) for _ in range(10000)]
     has_pattern_0 = [eg.has_pattern(e[0], eg.all_patterns[0]) for e in examples]
     has_pattern_1 = [eg.has_pattern(e[0], eg.all_patterns[1]) for e in examples]
     self.assertTrue(sum(has_pattern_0) + sum(has_pattern_1) > 4000)
     self.assertTrue(sum(has_pattern_0) > sum(has_pattern_1) * 3)
コード例 #4
0
from prefixspan import PrefixSpan

from data_sources.data_generator import ExamplesGenerator, get_multiple_patterns

VOCAB_SIZE = 1000
SEQ_LEN = 250
multiple_patterns = get_multiple_patterns(10)

NUM_EXAMPLES = 200
MIN_FREQ = 25
MIN_LEN = 5
MIN_DIST = 3

data_generator = ExamplesGenerator(seq_len=SEQ_LEN,
                                   vocab_size=VOCAB_SIZE,
                                   seed=111,
                                   multiple_patterns=multiple_patterns)

data_sequences = [next(data_generator()) for _ in range(NUM_EXAMPLES)]
positive_sequences = [s[0] for s in data_sequences if s[1] == 1]
negative_sequences = [s[0] for s in data_sequences if s[1] == 0]

positive_seq = PrefixSpan(positive_sequences).frequent(MIN_FREQ)
long_seq = [s for s in positive_seq if len(s[1]) >= MIN_LEN]
seq_by_freq = sorted(long_seq, key=lambda x: x[0], reverse=True)


def distance_from_seqs(s, s_list: list):
    """return distance (in terms of number of different tokens) between the sequence s
    and the list of sequence s_list"""
    if not s_list:
コード例 #5
0
 def _get_eg(self):
     eg = ExamplesGenerator(seed=7357, pattern=[(10, 123), (10, 456), (10, 789)])
     example_pat = eg.insert_pattern(list(range(100)), eg.pattern)
     self.assertTrue(eg.has_pattern(example_pat, eg.pattern))
     return eg, example_pat
コード例 #6
0
 def test_init(self):
     """can we build the damn thing?"""
     _ = ExamplesGenerator()
コード例 #7
0
 def test_proportions(self):
     eg = ExamplesGenerator(seed=7357, pattern=[(10, 123), (10, 456), (10, 789)])
     examples = [next(eg()) for _ in range(10000)]
     has_pattern = [eg.has_pattern(e[0], eg.pattern) for e in examples]
     self.assertEqual(sum(has_pattern), 4953)
コード例 #8
0
 def test_pattern_indices(self):
     eg = ExamplesGenerator(seed=7357, pattern=[(10, 123), (10, 456), (10, 789)])
     example_pat = eg.insert_pattern(list(range(100)), eg.pattern)
     self.assertEqual(eg.find_pattern_indices(example_pat, eg.pattern), [8, 14, 18])
コード例 #9
0
 def test_insert_pattern(self):
     eg = ExamplesGenerator(seq_len=10, seed=7357, pattern=[(5, 123), (2, 456)])
     examples_pat = eg.insert_pattern(list(range(10)), eg.pattern)
     self.assertEqual(examples_pat[4:6], [123, 456])
コード例 #10
0
 def test_call_seed(self):
     """How does this seed set up at  init works if we call the random generator later"""
     eg = ExamplesGenerator(seq_len=10, seed=456)
     examples = [next(eg())[0] for _ in range(10)]
     self.assertEqual(list(examples[0][0:3]), [27, 43, 89])