Esempio n. 1
0
    def test_build_singleton_2(self):
        '''
        Test order.
        '''
        method = sPENminer(None, window_size=3, max_size=2, view='order')
        u1 = ('1', '1', '2', '1', '1', '2', '1')
        u2 = ('1', '3', '1', '1', '3', '1', '1')
        u3 = ('1', '3', '4', '1', '3', '4', '1')
        updates = [u1, u2, u3]

        for t, update in enumerate(updates):
            t += 1
            method.time += 1
            if t == 1:
                assert (method.extractor.build_singleton(update,
                                                         t) == (t, update,
                                                                {'1', '2'}, {
                                                                    '1': 0,
                                                                    '2': 1
                                                                }, '1_0_1_1'))
            if t == 2:
                assert (method.extractor.build_singleton(update,
                                                         t) == (t, update,
                                                                {'3', '1'}, {
                                                                    '3': 0,
                                                                    '1': 1
                                                                }, '1_0_1_1'))
            if t == 3:
                assert (method.extractor.build_singleton(update,
                                                         t) == (t, update,
                                                                {'3', '4'}, {
                                                                    '3': 0,
                                                                    '4': 1
                                                                }, '1_0_1_1'))
Esempio n. 2
0
def main(args):
    stream = Stream(args.stream, delimiter=args.delimiter)
    print('Using view \"{}\"'.format(args.view))
    if not args.offline:
        if args.save_occs:
            print(
                '\'save_occs = True\' is only an option for offline verions. Occurrences will not be saved'
            )
        method = sPENminer(stream,
                           window_size=args.window_size,
                           max_size=args.max_size,
                           view=args.view,
                           alpha=args.alpha,
                           beta=args.beta,
                           gamma=args.gamma,
                           save_output=args.save_output)
    else:
        method = oPENminer(stream,
                           window_size=args.window_size,
                           max_size=args.max_size,
                           view=args.view,
                           alpha=args.alpha,
                           beta=args.beta,
                           gamma=args.gamma,
                           save_output=args.save_output,
                           save_occs=args.save_occs)
    method.mine(verbose=args.verbose)
Esempio n. 3
0
 def test_create_size_2_snippets_6(self):
     method = sPENminer(None, window_size=3, max_size=3)
     updates = [('1', '1', '2', '1', '1', '2', '1', 1),
                ('1', '3', '1', '1', '3', '1', '1', 2),
                ('1', '5', '4', '1', '5', '4', '1', 3),
                ('1', '3', '1', '1', '3', '1', '1', 5)]
     new = set()
     method.old_freq_of_current = dict()  # needed for book keeping
     method.freq_of_current = dict()  # needed for book keeping
     for i, update in enumerate(updates):
         method.time = update[-1]
         method.extractor.create_singleton(update)
         method.extractor.create_size_2_snippets(update)
         if i == 0:
             assert (len(method.extractor.singletons) == len(
                 method.extractor.compatability_links) == 1)
             assert (method.extractor.compatability_links[-1] == [])
         if i == 1:
             assert (len(method.extractor.singletons) == len(
                 method.extractor.compatability_links) == 2)
             assert (method.extractor.compatability_links[-1] == [1])
         if i == 2:
             assert (len(method.extractor.singletons) == len(
                 method.extractor.compatability_links) == 3)
             assert (method.extractor.compatability_links[-1] == [])
             assert (method.extractor.compatability_links[-2] == [1])
         if i == 3:
             assert (len(method.extractor.singletons) == len(
                 method.extractor.compatability_links) == 3)
             assert (method.extractor.compatability_links[-1] == [2])
             assert (method.extractor.compatability_links[-2] == [])
             assert (method.extractor.compatability_links[-3] == [1])
Esempio n. 4
0
    def test_P_3(self):
        method = sPENminer(None, window_size=3, max_size=1)
        method_offline = oPENminer(None, window_size=3, max_size=1)
        updates = [('1', '1', '2', '1', '1', '2', '1', 1), ('1', '1', '2', '1', '1', '2', '1', 2), ('1', '1', '2', '1', '1', '2', '1', 3)]

        for update in updates:
            method.process_update(update)
            method_offline.process_update(update)
        method_offline.compute_persistence()
        _P = self.P([1, 2, 3], interval_width=2)
        assert(_P == method_offline.Ps['1_1_2_1'])
        assert(_P == method.old_Ps.get('1_1_2_1') == method.query('1_1_2_1'))
Esempio n. 5
0
 def test_P_5(self):
     method = sPENminer(None, window_size=3, max_size=2)
     method_offline = oPENminer(None, window_size=3, max_size=1)
     random.seed(0)
     updates, interval_width = self.random_stream(max_node=10)
     pattern_to_occs = defaultdict(list)
     for update in updates:
         method.process_update(update)
         method_offline.process_update(update)
         pattern = method.extractor.singletons[-1][-1]
         pattern_to_occs[pattern].append(update[-1])
         assert(abs(method.old_Ps.get(pattern) - method.query(pattern)) < 0.000000001)
     method_offline.compute_persistence()
     for pattern, occs in pattern_to_occs.items():
         _P = self.P(occs, interval_width)
         assert(abs(method_offline.Ps[pattern] - method.query(pattern)) < 0.00000001)
Esempio n. 6
0
    def test_build_snippet_from_1(self):
        '''
        Test order.
        '''
        view = 'order'
        method = sPENminer(None, window_size=3, max_size=3, view=view)
        u1 = ('1', '1', '2', '1', '1', '2', '1', 1)
        u2 = ('1', '3', '1', '1', '3', '1', '1', 2)
        u3 = ('1', '3', '4', '1', '3', '4', '1', 3)
        updates = [u1, u2, u3]

        for t, update in enumerate(updates):
            t += 1
            method.time = update[-1]
            method.process_update(update)

        assert (method.num_occs['1_0_1_1|1_2_0_1|1_2_3_1'] == 1)
Esempio n. 7
0
    def test_P_4(self):
        '''
        Test that persistence is correctly computed when the update occurs not every second.
        '''
        method = sPENminer(None, window_size=3, max_size=1)
        method_offline = oPENminer(None, window_size=3, max_size=1)
        updates = [('1', '1', '2', '1', '1', '2', '1', 1),
                   ('1', '1', '2', '1', '1', '2', '1', 2),
                   ('1', '3', '4', '1', '3', '4', '1', 3),
                   ('1', '1', '2', '1', '1', '2', '1', 5)]

        for update in updates:
            method.process_update(update)
            method_offline.process_update(update)
        method_offline.compute_persistence()
        _P = self.P([1, 2, 5], interval_width=5-1)
        assert(_P == method_offline.Ps['1_1_2_1'])
        assert(_P == method.old_Ps.get('1_1_2_1'))
Esempio n. 8
0
def main(args):
    stream = Stream(args.stream, delimiter=args.delimiter)
    print('Using view \"{}\"'.format(args.view))
    if args.anomaly:
        method = sPENminerAnomaly(stream,
                                  window_size=args.window_size,
                                  max_size=args.max_size,
                                  view=args.view,
                                  alpha=args.alpha,
                                  beta=args.beta,
                                  gamma=args.gamma,
                                  data_stream=args.data_stream,
                                  freq=args.freq,
                                  num_trees=args.num_trees,
                                  max_depth=args.max_depth,
                                  seed=args.seed)
    elif args.data_stream:
        method = MethodDataStream(stream,
                                  window_size=args.window_size,
                                  max_size=args.max_size,
                                  view=args.view,
                                  save_output=args.save_output)
    elif not args.offline:
        if args.save_occs:
            print('\'save_occs = True\' is only an option for offline verions. Occurrences will not be saved')
        method = sPENminer(stream,
                           window_size=args.window_size,
                           max_size=args.max_size,
                           view=args.view,
                           alpha=args.alpha,
                           beta=args.beta,
                           gamma=args.gamma,
                           save_output=args.save_output)
    else:
        method = oPENminer(stream,
                           window_size=args.window_size,
                           max_size=args.max_size,
                           view=args.view,
                           alpha=args.alpha,
                           beta=args.beta,
                           gamma=args.gamma,
                           save_output=args.save_output,
                           save_occs=args.save_occs)
    method.mine(verbose=args.verbose)
Esempio n. 9
0
 def test_create_singleton_1(self):
     '''
     Test that create_singleton() correctly creates a singleton snippet.
     '''
     method = sPENminer(None, window_size=3, max_size=3)
     updates = [('1', '1', '2', '1', '1', '2', '1', 1),
                ('1', '3', '4', '1', '3', '4', '1', 2)]
     new = set()
     method.old_freq_of_current = dict()  # needed for book keeping
     method.freq_of_current = dict()  # needed for book keeping
     for i, update in enumerate(updates):
         method.time = int(update[-1])
         if method.ts == 0:
             method.ts = method.time
         method.extractor.create_singleton(update)
         if i == 0:
             assert (len(method.extractor.singletons) == 1)
             assert (method.extractor.singletons[-1][0] == 1)
         if i == 1:
             assert (len(method.extractor.singletons) == 2)
             assert (method.extractor.singletons[-1][0] == 2)
Esempio n. 10
0
 def test_P_11(self):
     '''
     Test with beta > 1.
     '''
     for num_updates in [1000, 2000]:
         beta = 3
         method = sPENminer(None, window_size=3, max_size=3, beta=beta)
         method_offline = oPENminer(None, window_size=3, max_size=3, beta=beta)
         random.seed(3)
         updates, interval_width = self.random_stream(max_node=20, num_updates=num_updates)
         pattern_to_occs = defaultdict(list)
         for update in updates:
             method.process_update(update)
             method_offline.process_update(update)
             pattern = method.extractor.singletons[-1][-1]
             pattern_to_occs[pattern].append(update[-1])
             assert(abs(method.old_Ps.get(pattern) - method.query(pattern)) < 0.000000001)
         method_offline.compute_persistence()
         for pattern, occs in pattern_to_occs.items():
             _P = self.P(occs, interval_width, beta=beta)
             assert(_P == method_offline.Ps[pattern])
             assert(abs(method_offline.Ps[pattern] - method.query(pattern)) < 0.00000001)
Esempio n. 11
0
 def test_create_size_3_snippets_1(self):
     method = sPENminer(None, window_size=3, max_size=3, view='id')
     updates = [('1', '1', '2', '1', '1', '2', '1', 1),
                ('1', '3', '1', '1', '3', '1', '1', 2),
                ('1', '5', '1', '1', '5', '1', '1', 3)]
     new = set()
     method.old_freq_of_current = dict()  # needed for book keeping
     method.freq_of_current = dict()  # needed for book keeping
     for i, update in enumerate(updates):
         method.extractor.create_singleton(update)
         method.extractor.create_size_2_snippets(update)
         method.extractor.create_size_3_snippets()
         if i == 0:
             assert (len(list(method.first_occs.keys())) == 1)
         if i == 1:
             assert (len(list(method.first_occs.keys())) == 3)
         if i == 2:
             assert (set(
                 method.extractor.compatability_links[-1]) == {1, 2})
             assert (set(method.extractor.compatability_links[-2]) == {1})
             assert (set(method.extractor.compatability_links[-3]) == set())
             assert ('1_1_2_1|1_3_1_1|1_5_1_1' in method.first_occs)
             assert (len(list(method.first_occs.keys())) == 7)