Beispiel #1
0
    def transform(self, data_type='train'):
        raw_path = self.conf.get('PATH', 'raw')
        txt_path = '{}/{}.txt'.format(raw_path, data_type)
        data = load_txt(txt_path)

        csv_path = '{}/raw.{}.csv'.format(raw_path, data_type)
        write_csv(csv_path, data)
Beispiel #2
0
    def save_preds(self, indexs, preds, model_name, data_type, cv_id, cv_num):
        data_preds = dict()
        if indexs:
            data_preds['index'] = indexs
        data_preds['label_id_pred'] = preds

        file_name = '{}/{}_{}_{}.{}.preds'.format(self.run_path, model_name,
                                                  cv_id, cv_num, data_type)
        write_csv(file_name, data_preds)
Beispiel #3
0
    def save_valid_preds(self, valid_indexs, valid_preds, model_name, cv_id,
                         cv_num):
        data_preds = dict()
        data_preds['index'] = valid_indexs
        data_preds['label_id_pred'] = valid_preds

        file_name = '{}/{}_{}_{}.{}.preds'.format(self.run_path, model_name,
                                                  cv_id, cv_num, 'valid')
        write_csv(file_name, data_preds)
Beispiel #4
0
 def count(self, data_name, data_type):
     rows = list()
     with open('{}/{}.{}.csv'.format(self.conf.get('PATH', 'raw'),
                                     data_name, data_type)) as csvfile:
         reader = csv.DictReader(csvfile)
         for row in reader:
             rows.append(self.count_row(row))
     data = self.aggregate(rows)
     file_name = '{}/{}.{}.csv'.format(self.conf.get('PATH', 'raw'),
                                       self.get_data_name(), data_type)
     write_csv(file_name, data)
Beispiel #5
0
 def cut(self, data_type):
     raw_path = self.conf.get('PATH', 'raw')
     data = read_csv('{}/raw.{}.csv'.format(self.conf.get('PATH', 'raw'), data_type))
     jieba_data = {'jieba': list()}
     if '标签' in data.keys():
         jieba_data['标签'] = data['标签']
     for content in data['内容']:
         words = list(jieba.cut(content))
         words = [word.encode('utf8') for word in words]
         jieba_data['jieba'].append('#_#'.join(words))
     write_csv('{}/{}.{}.csv'.format(raw_path, self.get_date_name(), data_type), jieba_data)