def _load(self):
     log('[{time}] loading from {path}'.format(time=get_time(),
                                               path=self._source_path))
     for i, label_tag in enumerate(self._label_tags):
         path = os.path.join(self._source_path, label_tag)
         files = sample(
             os.listdir(path)[self._start:self._end], self._max_num
         ) if self._max_num > 0 else os.listdir(path)[self._start:self._end]
         print('start: {}, end: {}'.format(self._start, self._end))
         print(len(files))
         pbar = ProgressBar(len(files))
         for j, filename in enumerate(files):
             filepath = os.path.join(path, filename)
             try:
                 with open(filepath, 'r') as f:
                     log_sequence = json.load(f)
                     feature = self._sequence2feature(log_sequence)
                     self._data_ids.append(
                         filepath.split('/')[-1].split('.')[0].split('_')
                         [0])
                     self._feature_data.append(feature)
                     self._label_data.append(i)
             except:
                 log('[{time}] Failed to load file {filepath}'.format(
                     time=get_time(), filepath=filepath))
                 print('[{time}] Failed to load file {filepath}'.format(
                     time=get_time(), filepath=filepath))
             pbar.updateBar(j)
Ejemplo n.º 2
0
 def _load(self):
     for i, label_tag in enumerate(self._label_tags):
         path = os.path.join(self._source_path, label_tag)
         print('\nLoading {} data...'.format(label_tag))
         pbar = ProgressBar(len(os.listdir(path)))
         for j, filename in enumerate(os.listdir(path)):
             filepath = os.path.join(path, filename)
             try:
                 with open(filepath, 'rb') as f:
                     # print filepath
                     feature_seq = json.load(f)
                     feature = self._sequence2feature(feature_seq)
                     self._data_ids.append(filepath.split('/')[-1])
                     self._feature_data.append(feature)
                     self._label_data.append(i)
             except EOFError:
                 print('[{time}] Failed to load file {filepath}'.format(
                     time=get_time(), filepath=filepath))
             pbar.updateBar(j)
Ejemplo n.º 3
0
    def _load(self):

        log('[{time}] loading from {path}'.format(time=get_time(),
                                                  path=self._source_path_list))
        print(self._label_tags)
        # 迭代标签['neg', 'pos']
        for i, label_tag in enumerate(self._label_tags):
            print('label_tag: {}'.format(label_tag))

            # 迭代日期目录[ds_1, ds_2, ...]
            for source_path in self._source_path_list:
                print('Loading data from DIR: {}'.format(source_path))

                # 获取待预测id
                if label_tag != 'pos' and label_tag != 'neg':
                    ds = source_path.split('/')[-1]
                    trigger_dir_pred = os.path.join(
                        SAVE_DIR_BASE, 'trigger',
                        '{}_{}'.format(ds, label_tag))
                    print('Trigger file: {}'.format(trigger_dir_pred))
                    with open(trigger_dir_pred) as f:
                        ids_to_pred = json.load(f)

                # 获取正样本id
                else:
                    ds = source_path.split('/')[-1]
                    trigger_dir_pos = os.path.join(SAVE_DIR_BASE, 'trigger',
                                                   '{}_pos'.format(ds))
                    print('Trigger file: {}'.format(trigger_dir_pos))
                    with open(trigger_dir_pos) as f:
                        ids_pos = json.load(f)
                    # 获取负样本id
                    if label_tag == 'neg':
                        ds = source_path.split('/')[-1]
                        trigger_dir_total = os.path.join(
                            SAVE_DIR_BASE, 'trigger', '{}_total'.format(ds))
                        print('Trigger file: {}'.format(trigger_dir_total))
                        with open(trigger_dir_total) as f:
                            ids_total = json.load(f)
                        random.seed(1)
                        ids_neg = random.sample(
                            list(set(ids_total) - set(ids_pos)), len(ids_pos))

                # 迭代行为序列,提取特征
                sample_or_not = lambda ids, max_num: random.sample(
                    ids, max_num) if max_num != 0 else ids
                if label_tag == 'neg':
                    ids = sample_or_not(ids_neg, self._max_num)
                elif label_tag == 'pos':
                    ids = sample_or_not(ids_pos, self._max_num)
                else:
                    ids = ids_to_pred
                print('label: {}, num: {}'.format(label_tag, len(ids)))

                pbar = ProgressBar(len(ids))
                for j, filename in enumerate(ids):
                    filepath = os.path.join(source_path, filename)
                    try:
                        with open(filepath, 'r') as f:
                            log_sequence = json.load(f)
                            feature = self._sequence2feature(log_sequence)
                            self._data_ids.append(filepath.split('/')[-1])
                            self._feature_data.append(feature)
                            self._label_data.append(i)
                    except Exception as e:
                        log('[{time}] Failed to load file {filepath}'.format(
                            time=get_time(), filepath=filepath))
                        print(
                            '[{time}] Failed to load file {filepath}'.format(
                                time=get_time(), filepath=filepath), e)
                    pbar.updateBar(j)