Example #1
0
 def run(self, detail=False):
     self.train()
     for name, stat in self.stats:
         try:
             for clf, path in self.classifiers:
                 linfo('----------roundly predict start-----------')
                 raw_stat, stat = ST.preprocess(stat)
                 union = [(raw,new) for raw, new in zip(raw_stat, stat) if new]
                 raw_stat = map(lambda x:x[0], union)
                 stat = map(lambda x:x[1], union)
                 pred_tags = clf.predict(stat)
                 if not pred_tags or len(pred_tags) != len(stat):
                     raise Exception('Predict Results Exception')
                 tag2dist = self.cal_tag_dist(pred_tags)
                 linfo('%s-roundly online sentiment distribution: %s' % (clf, tag2dist))
                 save(path, 'a', '%s\t%s\t%s\n' % (name, json.dumps(tag2dist), len(stat)))
                 if detail:
                     detail_path = '%s%s' % (stats_predict_detail_prefix, name)
                     if os.path.exists(detail_path):
                         os.system('rm %s' % detail_path)
                     for tag, txt in zip(pred_tags, raw_stat):
                         ET.write_file(detail_path, 'a', '%s -%s\n' % (tag, txt))
                         #print tag, '-%s' % txt
                 linfo('----------roundly predict end-----------')
         except Exception as e:
             lexcept('Unknown exception %s' % e)
    def run(self, sample_enabled=False, profile_enabled=False):
        for csf, path in self.classifiers:
            csf.train()
        action_day = ET.format_time(time.localtime())[:10]
        action_total_cnt = 0
        if profile_enabled:
            self.reset_profile()
        while True:
            try:
                stats = self.psr.online_run(interval=10)
                if not stats:
                    continue

                linfo('-------roundly analysis----------')
                citys = map(lambda x: x[0], stats)
                stats = map(lambda x: x[1], stats)
                raw_stats, stats = ST.preprocess(stats)
                valid_ids = [i for i, txt in enumerate(stats) if txt]
                stats = map(lambda i: stats[i], valid_ids)
                raw_stats = map(lambda i: raw_stats[i], valid_ids)
                citys = map(lambda i: citys[i], valid_ids)
                f_t = ET.format_time(time.localtime())
                if sample_enabled:
                    sample_path = '%srealtime_%s' % (
                        sample_prefix, f_t.replace(' ', '').replace(
                            '-', '').replace(':', ''))
                    ET.write_file(sample_path, 'a',
                                  '%s\n' % json.dumps(raw_stats[:300]))

                #only one model supported at the same time now.
                for clf, path in self.classifiers:
                    tag2cnt = {'P': 0, 'N': 0, 'O': 0}
                    pred_tags = clf.predict(stats)
                    for tag in pred_tags:
                        tag2cnt[tag] += 1
                    tag2dist = {
                        tag: cnt * 1.0 / len(stats)
                        for tag, cnt in tag2cnt.items()
                    }
                    linfo('%s-roundly online sentiment distribution: %s' %
                          (clf, tag2dist))
                    f_time = ET.format_time(time.localtime())
                    today = f_time[:10]
                    action_total_cnt = (
                        action_total_cnt + len(pred_tags)
                    ) if today == action_day else len(pred_tags)
                    save(
                        path, 'a', '%s\t%s\t%s\n' %
                        (f_time, json.dumps(tag2dist), len(stats)))
                    if profile_enabled:
                        self.update_profile_spatial(citys, pred_tags)
                        self.update_profile_topic(raw_stats, pred_tags)
                        if today != action_day:
                            self.save_profile(action_day)
                            self.reset_profile()
                            action_day = today
                    break
            except Exception as e:
                lexcept('Unknown exception %s' % e)
 def format_test(self, emoticon=True, parenthesis=True):
     test_path='../test_data/%s_test_data' % self.classifier_type
     self._test_xs, self._test_ys = ST.load_data(test_path)
     linfo('begin preprocess test data, then sparse')
     self._raw_test_xs, self._test_xs = ST.preprocess(self._test_xs)
     #ST.replace_url(self._test_xs, fill='H')
     #ST.replace_target(self._test_xs, fill='T')
     self._test_ys = map(lambda x:self.tag2index[x], self._test_ys)
     self.format_sparse(self._test_xs, self._test_ys, '%s/test_data/%s%s_sparse_test_data_%s' % (project_dir, self.flag_prefix, self.classifier_type, 'icon' if emoticon else 'no_icon'))
 def format_test(self, emoticon=True, parenthesis=True):
     test_path = '../test_data/%s_test_data' % self.classifier_type
     self._test_xs, self._test_ys = ST.load_data(test_path)
     linfo('begin preprocess test data, then sparse')
     self._raw_test_xs, self._test_xs = ST.preprocess(self._test_xs)
     #ST.replace_url(self._test_xs, fill='H')
     #ST.replace_target(self._test_xs, fill='T')
     self._test_ys = map(lambda x: self.tag2index[x], self._test_ys)
     self.format_sparse(
         self._test_xs, self._test_ys,
         '%s/test_data/%s%s_sparse_test_data_%s' %
         (project_dir, self.flag_prefix, self.classifier_type,
          'icon' if emoticon else 'no_icon'))
 def run(self, sample_enabled=False, profile_enabled=False):
     for csf, path in self.classifiers:
         csf.train()
     action_day = ET.format_time(time.localtime())[:10]
     action_total_cnt = 0
     if profile_enabled:
         self.reset_profile()
     while True:
         try:
             stats = self.psr.online_run(interval=10)
             if not stats:
                 continue
             
             linfo('-------roundly analysis----------')
             citys = map(lambda x:x[0], stats)
             stats = map(lambda x:x[1], stats)
             raw_stats, stats = ST.preprocess(stats)
             valid_ids = [i for i, txt in enumerate(stats) if txt]
             stats = map(lambda i:stats[i], valid_ids)
             raw_stats = map(lambda i:raw_stats[i], valid_ids)
             citys = map(lambda i:citys[i], valid_ids)
             f_t = ET.format_time(time.localtime())
             if sample_enabled:
                 sample_path = '%srealtime_%s' % (sample_prefix, f_t.replace(' ', '').replace('-', '').replace(':',''))
                 ET.write_file(sample_path, 'a', '%s\n' % json.dumps(raw_stats[:300]))
             
             #only one model supported at the same time now.
             for clf, path in self.classifiers:
                 tag2cnt = {'P':0, 'N':0, 'O':0}
                 pred_tags = clf.predict(stats)
                 for tag in pred_tags:
                     tag2cnt[tag] += 1
                 tag2dist =  {tag:cnt * 1.0 / len(stats) for tag,cnt in tag2cnt.items()}
                 linfo('%s-roundly online sentiment distribution: %s' % (clf, tag2dist))
                 f_time = ET.format_time(time.localtime())
                 today = f_time[:10]
                 action_total_cnt = (action_total_cnt + len(pred_tags)) if today == action_day else len(pred_tags)
                 save(path, 'a', '%s\t%s\t%s\n' % (f_time, json.dumps(tag2dist), len(stats)))
                 if profile_enabled:
                     self.update_profile_spatial(citys, pred_tags)
                     self.update_profile_topic(raw_stats, pred_tags)
                     if today != action_day:
                         self.save_profile(action_day)
                         self.reset_profile()
                         action_day = today
                 break
         except Exception as e:
             lexcept('Unknown exception %s' % e)