(1 - a) * (self.stats.get(term, (0, 0))[1] + a * delta ** 2)) def find_emerging_terms(self): with open(os.path.join(self.result_dir,'burst_terms'), 'a', encoding = 'utf-8') as f: f.write('Interval %d:\n' % self.ii) for term, count in self.curr_term_count.items(): stats = self.stats.get(term,(0,0)) ewma = stats[0] ewmvar = stats[1] ewma = max(ewma, self.beta) ratio = (count / self.curr_tweets_count - ewma) / (math.sqrt(ewmvar) + self.beta) #THE FORMULA if ratio > self.s: f.write('%s %f\r\n' % (term, ratio)) #\r\n under windows f.write('\n') f.flush() if __name__ == '__main__': ts = TwitterStream() #datapath = '/Users/Adward/Github/Automatic-Rumor-Detection/TwitterEventDetection/TestData/original' datapath = '/Volumes/Adward_Backup/SRTP/data' dirlist = os.listdir(datapath) for path in dirlist: if path.startswith('201') and os.path.isdir(os.path.join(datapath,path)): ts.source(os.path.join(datapath,path)) ts.sort() temp = TemporalProcessor(3600,0.3,0.0002,8, #result_dir='/Users/Adward/Github/Automatic-Rumor-Detection/TwitterEventDetection/TestData/serialized') result_dir = '/Volumes/Adward_Backup/SRTP/serialized_test') ed = EventDetector(TextProcessor(),temp,ts.generator()) ed.process_stream()