def run(self, detail=False): self.train() for name, stat in self.stats: try: for clf, path in self.classifiers: linfo('----------roundly predict start-----------') raw_stat, stat = ST.preprocess(stat) union = [(raw,new) for raw, new in zip(raw_stat, stat) if new] raw_stat = map(lambda x:x[0], union) stat = map(lambda x:x[1], union) pred_tags = clf.predict(stat) if not pred_tags or len(pred_tags) != len(stat): raise Exception('Predict Results Exception') tag2dist = self.cal_tag_dist(pred_tags) linfo('%s-roundly online sentiment distribution: %s' % (clf, tag2dist)) save(path, 'a', '%s\t%s\t%s\n' % (name, json.dumps(tag2dist), len(stat))) if detail: detail_path = '%s%s' % (stats_predict_detail_prefix, name) if os.path.exists(detail_path): os.system('rm %s' % detail_path) for tag, txt in zip(pred_tags, raw_stat): ET.write_file(detail_path, 'a', '%s -%s\n' % (tag, txt)) #print tag, '-%s' % txt linfo('----------roundly predict end-----------') except Exception as e: lexcept('Unknown exception %s' % e)
def run(self, sample_enabled=False, profile_enabled=False): for csf, path in self.classifiers: csf.train() action_day = ET.format_time(time.localtime())[:10] action_total_cnt = 0 if profile_enabled: self.reset_profile() while True: try: stats = self.psr.online_run(interval=10) if not stats: continue linfo('-------roundly analysis----------') citys = map(lambda x: x[0], stats) stats = map(lambda x: x[1], stats) raw_stats, stats = ST.preprocess(stats) valid_ids = [i for i, txt in enumerate(stats) if txt] stats = map(lambda i: stats[i], valid_ids) raw_stats = map(lambda i: raw_stats[i], valid_ids) citys = map(lambda i: citys[i], valid_ids) f_t = ET.format_time(time.localtime()) if sample_enabled: sample_path = '%srealtime_%s' % ( sample_prefix, f_t.replace(' ', '').replace( '-', '').replace(':', '')) ET.write_file(sample_path, 'a', '%s\n' % json.dumps(raw_stats[:300])) #only one model supported at the same time now. for clf, path in self.classifiers: tag2cnt = {'P': 0, 'N': 0, 'O': 0} pred_tags = clf.predict(stats) for tag in pred_tags: tag2cnt[tag] += 1 tag2dist = { tag: cnt * 1.0 / len(stats) for tag, cnt in tag2cnt.items() } linfo('%s-roundly online sentiment distribution: %s' % (clf, tag2dist)) f_time = ET.format_time(time.localtime()) today = f_time[:10] action_total_cnt = ( action_total_cnt + len(pred_tags) ) if today == action_day else len(pred_tags) save( path, 'a', '%s\t%s\t%s\n' % (f_time, json.dumps(tag2dist), len(stats))) if profile_enabled: self.update_profile_spatial(citys, pred_tags) self.update_profile_topic(raw_stats, pred_tags) if today != action_day: self.save_profile(action_day) self.reset_profile() action_day = today break except Exception as e: lexcept('Unknown exception %s' % e)
def format_test(self, emoticon=True, parenthesis=True): test_path='../test_data/%s_test_data' % self.classifier_type self._test_xs, self._test_ys = ST.load_data(test_path) linfo('begin preprocess test data, then sparse') self._raw_test_xs, self._test_xs = ST.preprocess(self._test_xs) #ST.replace_url(self._test_xs, fill='H') #ST.replace_target(self._test_xs, fill='T') self._test_ys = map(lambda x:self.tag2index[x], self._test_ys) self.format_sparse(self._test_xs, self._test_ys, '%s/test_data/%s%s_sparse_test_data_%s' % (project_dir, self.flag_prefix, self.classifier_type, 'icon' if emoticon else 'no_icon'))
def format_test(self, emoticon=True, parenthesis=True): test_path = '../test_data/%s_test_data' % self.classifier_type self._test_xs, self._test_ys = ST.load_data(test_path) linfo('begin preprocess test data, then sparse') self._raw_test_xs, self._test_xs = ST.preprocess(self._test_xs) #ST.replace_url(self._test_xs, fill='H') #ST.replace_target(self._test_xs, fill='T') self._test_ys = map(lambda x: self.tag2index[x], self._test_ys) self.format_sparse( self._test_xs, self._test_ys, '%s/test_data/%s%s_sparse_test_data_%s' % (project_dir, self.flag_prefix, self.classifier_type, 'icon' if emoticon else 'no_icon'))
def run(self, sample_enabled=False, profile_enabled=False): for csf, path in self.classifiers: csf.train() action_day = ET.format_time(time.localtime())[:10] action_total_cnt = 0 if profile_enabled: self.reset_profile() while True: try: stats = self.psr.online_run(interval=10) if not stats: continue linfo('-------roundly analysis----------') citys = map(lambda x:x[0], stats) stats = map(lambda x:x[1], stats) raw_stats, stats = ST.preprocess(stats) valid_ids = [i for i, txt in enumerate(stats) if txt] stats = map(lambda i:stats[i], valid_ids) raw_stats = map(lambda i:raw_stats[i], valid_ids) citys = map(lambda i:citys[i], valid_ids) f_t = ET.format_time(time.localtime()) if sample_enabled: sample_path = '%srealtime_%s' % (sample_prefix, f_t.replace(' ', '').replace('-', '').replace(':','')) ET.write_file(sample_path, 'a', '%s\n' % json.dumps(raw_stats[:300])) #only one model supported at the same time now. for clf, path in self.classifiers: tag2cnt = {'P':0, 'N':0, 'O':0} pred_tags = clf.predict(stats) for tag in pred_tags: tag2cnt[tag] += 1 tag2dist = {tag:cnt * 1.0 / len(stats) for tag,cnt in tag2cnt.items()} linfo('%s-roundly online sentiment distribution: %s' % (clf, tag2dist)) f_time = ET.format_time(time.localtime()) today = f_time[:10] action_total_cnt = (action_total_cnt + len(pred_tags)) if today == action_day else len(pred_tags) save(path, 'a', '%s\t%s\t%s\n' % (f_time, json.dumps(tag2dist), len(stats))) if profile_enabled: self.update_profile_spatial(citys, pred_tags) self.update_profile_topic(raw_stats, pred_tags) if today != action_day: self.save_profile(action_day) self.reset_profile() action_day = today break except Exception as e: lexcept('Unknown exception %s' % e)