def exc():
    # 5. Test
    tmp = store.loadTermData()
    termList = {'X': [i[0] for i in tmp], 'y': [int(i[1]) for i in tmp]}

    print('=======================================================')
    print('=> Term Classifying...')

    if (file_model):
        clf = store.loadClassifier(file=file_model)
    else:
        clf = store.loadClassifier()
    results = []

    for i in range(len(termList['X'])):
        preprocessd_term = preprocess(termList['X'][i])
        X = np.asarray([extractFeatureText(termList['X'][i])])
        results.append(clf.predict(X)[0].tolist() + clf.predict_proba(X)[0].tolist() +
                       ['', preprocessd_term] + X[0].tolist())

    titles = ['TestCase', 'Term', 'Label', 'Predicted Label', 'Name Score', 'Address Score', 'Phone Score', '', 'Preprocessed_Term'] + \
            feature_names

    tacc = sum([1 for (y1, y2) in zip(termList['y'], [result[0] for result in results]) if (y1 == y2)]) / len(termList['y'])

    if (file_model):
        store.saveTermTestResults(tacc, titles, termList, results, file=file_model + '_' + file_term_classify_result)
    else:
        store.saveTermTestResults(tacc, titles, termList, results, file=timeManage.getTime() + '_' + file_term_classify_result)

    return tacc
Ejemplo n.º 2
0
def templateFiler(clf, ptemplates):
    templates = []

    m = {0: 'name', 1: 'address', 2: 'phone'}

    # =======================================
    # for template in ptemplates:
    #     dct = {}
    #     for i, terr in zip(range(len(template)), template):
    #         dct[m[i]] = {'term': template[i], 'score': 0}
    #
    #     templates.append(dct)
    # =======================================

    for terms in ptemplates:
        X = np.asarray([fe.extractFeatureText(term) for term in terms])
        cls = clf.predict(X)

        tmp = copy.deepcopy(cls.reshape((1, cls.shape[0])).tolist()[0])
        tmp.sort()
        if (tmp == list(range(len(terms)))):
            dct = {}
            probs = clf.predict_proba(X)
            for (term, cl, prob) in zip(terms, cls, probs):
                try:
                    dct[m[int(cl)]] = {'term': term, 'score': prob[int(cl)]}
                except ValueError:
                    dct[m[int(cl)]] = {'term': term, 'score': prob[cl]}
            dct['score'] = sum([log(dct[key]['score']) for key in dct])
            templates.append(dct)

    if (len(templates) > 0):
        templates = sorted(templates, key=lambda k: k['score'], reverse=True)

    return templates
Ejemplo n.º 3
0
def exc():
    # 5. Test
    tmp = store.loadTermData()
    termList = {'X': [i[0] for i in tmp], 'y': [int(i[1]) for i in tmp]}

    print('=======================================================')
    print('=> Term Classifying...')

    if (file_model):
        clf = store.loadClassifier(file=file_model)
    else:
        clf = store.loadClassifier()
    results = []

    for i in range(len(termList['X'])):
        preprocessd_term = preprocess(termList['X'][i])
        X = np.asarray([extractFeatureText(termList['X'][i], getFeatureNames())])
        y_hat = clf.predict(X)[0].tolist()[0]
        results.append(clf.predict(X)[0].tolist() + clf.predict_proba(X)[0].tolist() +
                       [1 if (y_hat != termList['y'][i]) else 0, preprocessd_term] + X[0].tolist())

    titles = ['TestCase', 'Term', 'Label', 'Predicted Label', 'Name Score', 'Address Score', 'Phone Score', 'Error', 'Preprocessed_Term'] + \
            getFeatureNames()

    tacc = sum([1 for (y1, y2) in zip(termList['y'], [result[0] for result in results]) if (y1 == y2)]) / len(termList['y'])

    if (file_model):
        store.saveTermTestResults(tacc, titles, termList, results, file=file_model + '_' + file_term_classify_result)

    else:
        store.saveTermTestResults(tacc, titles, termList, results, file=timeManage.getTime() + '_' + file_term_classify_result)

    return tacc
Ejemplo n.º 4
0
def templateFiler(clf, ptemplates):
    templates = []

    m = {0: 'name', 1: 'address', 2: 'phone'}

    # =======================================
    # for template in ptemplates:
    #     dct = {}
    #     for i, terr in zip(range(len(template)), template):
    #         dct[m[i]] = {'term': template[i], 'score': 0}
    #
    #     templates.append(dct)
    # =======================================

    for terms in ptemplates:
        X = np.asarray([fe.extractFeatureText(term) for term in terms])
        cls = clf.predict(X)

        tmp = copy.deepcopy(cls.reshape((1, cls.shape[0])).tolist()[0])
        tmp.sort()
        if (tmp == list(range(len(terms)))):
            dct = {}
            probs = clf.predict_proba(X)
            for (term, cl, prob) in zip(terms, cls, probs):
                try:
                    dct[m[int(cl)]] = {'term': term, 'score': prob[int(cl)]}
                except ValueError:
                    dct[m[int(cl)]] = {'term': term, 'score': prob[cl]}
            dct['score'] = sum([log(dct[key]['score']) for key in dct])
            templates.append(dct)

    if (len(templates) > 0):
        templates = sorted(templates, key=lambda k: k['score'], reverse=True)

    return templates
Ejemplo n.º 5
0
def test(feature_func, preprocessing_func):
    # 4. Test
    termList = store.loadTermData()

    print('=======================================================')
    print('=> Term Classifying...')
    # _time, templateList = sg.parseAddress(termList, feature_func, preprocessing_func)

    clf = store.loadClassifier()
    results = []

    for i in range(len(termList['X'])):
        preprocessd_term = eval('preprocessing(termList[\'X\'][i])')
        X = np.asarray([
            extractFeatureText(feature_func, preprocessing_func,
                               termList['X'][i])
        ])
        results.append(
            clf.predict(X)[0].tolist() + clf.predict_proba(X)[0].tolist() +
            ['', preprocessd_term] + X[0].tolist())



    titles = ['TestCase', 'Term', 'Label', 'Predicted Label', 'Name Score', 'Address Score', 'Phone Score', '', 'Preprocessed_Term'] + \
            feature_names

    store.saveTermTestResults(titles, termList, results)
Ejemplo n.º 6
0
__author__ = 'Thong_Le'
import libs.features as fe
from libs.config import  *

text = 'Thon Xa Huyen'

X = fe.extractFeatureText(feature_func, preprocessing_func, text)
None

Ejemplo n.º 7
0
__author__ = 'Thong_Le'
import libs.features as fe
from libs.config import *

text = 'Thon Xa Huyen'

X = fe.extractFeatureText(feature_func, preprocessing_func, text)
None