Example #1
0
 def create_file_input(self, message):
     fileUtil = FileUtil()
     lst_char = message[:]
     all_message = ''
     for j in range(len(lst_char)):
         if j == 0:
             ans = 'B'
         else:
             ans = 'I'
         message_model = lst_char[j]+' '+self.get_group(lst_char[j])+' '+ans+'\n'
         all_message = all_message+message_model
     fileUtil.write_newfile('crf.test.data', all_message)                    
Example #2
0
    def crfpp(self, msg):
        crf = CRF()
        fileUtil = FileUtil()
        crf.create_file_input(msg)
        os.system('crf_test -m model1 crf.test.data > crf.result')

        lst = fileUtil.read_file('crf.result')
        #         lst = [a for a in lst if a != u'\n']
        #         str_ans = reduce(lambda x,y:x+y, [a.split('\t')[0] for a in lst])

        # ans = reduce(lambda x,y:x+y, [a.split('\t')[3][:-1] for a in lst])
        #         lst_col3 = [a.split('\t')[3][:-1] for a in lst]
        lst_col3, str_ans = self.process_ans(lst)
        lst_ans = [n for (n, e) in enumerate(lst_col3) if e == 'B']
        result_lst = []
        for i in range(len(lst_ans) - 1):
            a = lst_ans[i]
            b = lst_ans[i + 1]
            result_lst.append(str_ans[a:b])
        result_lst.append(str_ans[b:len(str_ans)])
        return result_lst
Example #3
0
    def crfpp(self, msg):
        crf = CRF()
        fileUtil = FileUtil()
        crf.create_file_input(msg)
        os.system('crf_test -m ../model1 crf.test.data > crf.result')

        lst = fileUtil.read_file('crf.result')
#         lst = [a for a in lst if a != u'\n']
#         str_ans = reduce(lambda x,y:x+y, [a.split('\t')[0] for a in lst])
         
        # ans = reduce(lambda x,y:x+y, [a.split('\t')[3][:-1] for a in lst])
#         lst_col3 = [a.split('\t')[3][:-1] for a in lst]
        lst_col3, str_ans = self.process_ans(lst)
        lst_ans = [n for (n, e) in enumerate(lst_col3) if e == 'B']
        result_lst = []
        for i in range(len(lst_ans)-1):
            a = lst_ans[i]
            b = lst_ans[i+1]
            result_lst.append(str_ans[a:b])
        result_lst.append(str_ans[b:len(str_ans)])
        return result_lst    
Example #4
0
 def create_file_output(self, message):
     result = []
     fileUtil = FileUtil()
Example #5
0
import pickle, random, time, logging, sys, json, codecs
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer, TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nlp import CRFWordSegment
from utilfile import FileUtil
from sklearn.metrics import f1_score
import numpy as np
from data_bean import NewDataMapping

dict_list = set([
    x.replace('\n', '') for x in FileUtil.read_file('data/resource/dict.txt')
])

log = logging.getLogger('cos_main')
log.setLevel(logging.INFO)
format = logging.Formatter(
    "%(asctime)s - %(name)s - %(levelname)s - %(message)s")

ch = logging.StreamHandler(sys.stdout)
ch.setFormatter(format)
log.addHandler(ch)

fh = logging.FileHandler("cos_main.log")
fh.setFormatter(format)
log.addHandler(fh)