def detect_events(self, index, doc_field, max_perc_words_by_topic, logger, time_slice_length, k=10, rel_words_per_event=5, theta=0.6, sigma=0.5): # vocabulary = self.get_vocabulary(index, doc_field, max_perc_words_by_topic) # corpus = TobasCorpus(vocabulary=vocabulary) # text timestamp_ms (must be a date object) print("Getting the vocabulary-based tweets") vocabulary_tweets = self.get_vocabulary_tweets( index, doc_field, max_perc_words_by_topic) print("Setting the corpus") self.corpus = TobasCorpus( tweets=vocabulary_tweets ) # text timestamp_ms (must be a date object) print("Discretizing the corpus") self.corpus.discretize(time_slice_length, logger=logger) print("Running MABED phase 1") mabed = MABED(self.corpus, logger) self.rel_words_per_event = rel_words_per_event self.p = rel_words_per_event # since some inherited methods need it with this name self.theta = theta self.sigma = sigma basic_events = mabed.phase1() print("Running MABED phase 2") final_events = self.phase2(basic_events) print("Events", final_events) return final_events
def detect_filtered_events(self, index="test3", k=10, maf=10, mrf=0.4, tsl=30, p=10, theta=0.6, sigma=0.6, session=False, filter=False, cluster=2): sw = 'stopwords/twitter_all.txt' sep = '\t' print('Parameters:') print( ' Index: %s\n k: %d\n Stop-words: %s\n Min. abs. word frequency: %d\n Max. rel. word frequency: %f' % (index, k, sw, maf, mrf)) print(' p: %d\n theta: %f\n sigma: %f' % (p, theta, sigma)) print('Loading corpus...') start_time = timeit.default_timer() my_corpus = Corpus(sw, maf, mrf, sep, index=index, session=session, filter=filter) if not my_corpus.tweets: return False elapsed = timeit.default_timer() - start_time print('Corpus loaded in %f seconds.' % elapsed) time_slice_length = tsl print('Partitioning tweets into %d-minute time-slices...' % time_slice_length) start_time = timeit.default_timer() my_corpus.discretize(time_slice_length, cluster) elapsed = timeit.default_timer() - start_time print('Partitioning done in %f seconds.' % elapsed) print('Running MABED...') start_time = timeit.default_timer() mabed = MABED(my_corpus) mabed.run(k=k, p=p, theta=theta, sigma=sigma) elapsed = timeit.default_timer() - start_time print('Event detection performed in %f seconds.' % elapsed) return mabed
start_time = timeit.default_timer() my_corpus = Corpus(args.i, args.sw, args.maf, args.mrf) elapsed = timeit.default_timer() - start_time print('Corpus loaded in %f seconds.' % elapsed) time_slice_length = args.tsl print('Partitioning tweets into %d-minute time-slices...' % time_slice_length) start_time = timeit.default_timer() my_corpus.discretize(time_slice_length) elapsed = timeit.default_timer() - start_time print('Partitioning done in %f seconds.' % elapsed) print('Running MABED...') k = args.k p = args.p theta = args.t sigma = args.s start_time = timeit.default_timer() mabed = MABED(my_corpus) mabed.run(k=k, p=p, theta=theta, sigma=sigma) mabed.print_events() mabed.prepare_csv(my_corpus.save_start_date, time_slice_length) mabed.print_anomalies() elapsed = timeit.default_timer() - start_time print('Event detection performed in %f seconds.' % elapsed) if args.o is not None: utils.save_events(mabed, args.o) print('Events saved in %s' % args.o)
print(' p: %d\n theta: %f\n sigma: %f' % (args.p, args.t, args.s)) print('Loading corpus...') start_time = timeit.default_timer() my_corpus = Corpus(args.i, args.sw, args.maf, args.mrf, args.sep) elapsed = timeit.default_timer() - start_time print('Corpus loaded in %f seconds.' % elapsed) time_slice_length = args.tsl print('Partitioning tweets into %d-minute time-slices...' % time_slice_length) start_time = timeit.default_timer() my_corpus.discretize(time_slice_length) elapsed = timeit.default_timer() - start_time print('Partitioning done in %f seconds.' % elapsed) print('Running MABED...') k = args.k p = args.p theta = args.t sigma = args.s start_time = timeit.default_timer() mabed = MABED(my_corpus) mabed.run(k=k, p=p, theta=theta, sigma=sigma) mabed.print_events() elapsed = timeit.default_timer() - start_time print('Event detection performed in %f seconds.' % elapsed) if args.o is not None: utils.save_events(mabed, args.o) print('Events saved in %s' % args.o)