Python classifyの例、underthesea.classify Pythonの例

コード例 #1

0

ファイルを表示

ファイル: nlp_processing.py プロジェクト: vmtrandev/python-fanpage-scraping

def text_processing(post):
    print('Start ' + post['tag'])
    # post_id = post['post_id'] if 'post_id' in post else '_blank'
    if 'imgTextLength' in post and post['imgTextLength'] > 0:
        return {
            **post,
            'textSentiment': sentiment(post['cleanedText']),
            'textClassification': classify(post['cleanedText']),
            'imgSentiment': sentiment(post['cleanedImgText']),
            'imgClassification': classify(post['cleanedImgText']),
        }

    return {
        **post,
        'textSentiment': sentiment(post['cleanedText']),
        'textClassification': classify(post['cleanedText']),
    }

コード例 #2

0

ファイルを表示

ファイル: views.py プロジェクト: NguyenHauHN/underthesea.demo

def classification(request):
    result = {}
    try:
        text = json.loads(request.body.decode("utf-8"))["text"]
        tags = uts.classify(text)
        result["output"] = tags
    except:
        result = {"error": "Bad request!"}
    return JsonResponse(result)

コード例 #3

0

ファイルを表示

ファイル: vr.py プロジェクト: manh16nd/int3411-voice-processing

def recognition(filename):
    trim()
    remove_noise()

    for file in glob.glob("upload/remove_noise.wav"):
        wav_file = sr.AudioFile(file)
        with wav_file as source:
            audio = r.record(source)
            text = r.recognize_google(audio, language="vi-VN")
            subject = classify(text)
            return {'text': text, 'subject': subject}

コード例 #4

0

ファイルを表示

def classification(request):
    result = {}
    try:
        data = json.loads(request.body.decode("utf-8"))
        text = data["text"]
        domain = data["domain"] if data["domain"] is not "general" else None
        tags = uts.classify(text, domain=domain)
        result["output"] = tags
    except Exception as e:
        print(e)
        result = {"error": "Bad request!"}
    return JsonResponse(result)

コード例 #5

0

ファイルを表示

 def underthesea_annotate(self, text, mode):
     if mode == 'sent_tokenize':
         return sent_tokenize(text)
     elif mode == 'word_tokenize':
         return word_tokenize(text)
     elif mode == 'pos_tag':
         return pos_tag(text)
     elif mode == 'chunk':
         return chunk(text)
     elif mode == 'ner':
         return ner(text)
     elif mode == 'classify':
         return classify(text)
     elif mode == 'sentiment':
         return sentiment(text)
     else:
         raise Exception("Wrong request, please check your request")

コード例 #6

0

ファイルを表示

ファイル: underthesea_text.py プロジェクト: dnkhanh45/demo-nlp-tasks-web-app

def underthesea_prc(text):
    pos_tags = pos_tag(text)

    just_ner = ner(text)
    result = {}
    s = ''
    key = ''
    for index, x in enumerate(just_ner):
        ner_label = str(x[3]).split('-')
        if ner_label[0] == 'O' or index == len(just_ner) - 1:
            if s != '':
                if key not in result:
                    result[key] = []
                    result[key].append(s)
                else:
                    result[key].append(s)
                s = ''
        else:
            s = str(x[0])
            key = ner_label[1]
    ner_text = []
    for key, value in result.items():
        a = ''
        a += key + ": "
        value_len = len(value)
        for index, x in enumerate(value):
            a += x
            if index != value_len - 1:
                a += ", "
        ner_text.append(a)

    classify_result = ViUtils.add_accents(
        (classify(text)[0]).replace('_', ' '))

    sentiment_result = sentiment(text)

    return underthesea_text_result(pos_tags, ner_text, classify_result,
                                   sentiment_result)

コード例 #7

0

ファイルを表示

# DEMO TEST FUNCTION TÁCH TỪ
sentence_1 = u"Trong ngày sơ đồ 3-5-2 của Zinedine Zidane để lộ quá nhiều điểm yếu, Real Madrid đã phải trả giá bằng việc bỏ lỡ chuyến bay đến Thổ Nhĩ Kỳ."
sentence_2 = u"Thủ tướng đốc thúc triển khai nhiều tuyến cao tốc cấp thiết ở miền Tây"

# print(word_tokenize(sentence_1))
# print(word_tokenize(sentence_1, format="text"))

# print(word_tokenize(sentence_2))
# print(word_tokenize(sentence_2, format="text"))

# ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

# DEMO TEST FUNCTION GÁN NHÃN TỪ LOẠI (Dữ liệu trả về sẽ là 1 tuple với ts thứ 1 là giá trị, tham số thứ 2 là nhãn của nó)
# (N là danh từ, A là tính từ, E là giới từ, Ny và Np là danh từ riêng, V là động từ, R là trạng từ, C là liên từ, CH là dấu câu, L là định từ)
text_1 = u"Chợ thịt chó nổi tiếng ở TP Hồ Chí Minh bị truy quyét"
# print(pos_tag(text_1))

text_2 = u"Một lô hàng bí ẩn đã hé lộ cách thức mà các đế chế ma túy châu Á đổi mới để qua mặt các nỗ lực truy quét của lực lượng chức năng."
# print(pos_tag(text_2))

# ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

# DEMO TEST FUNCTION PHÂN LOẠI VĂN BẢN TIẾNG VIỆT
classify('HLV đầu tiên ở Premier League bị sa thải sau 4 vòng đấu')

classify('Hội đồng tư vấn kinh doanh Asean vinh danh giải thưởng quốc tế')

classify('Lãi suất từ BIDV rất ưu đãi', domain='bank')

# ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

コード例 #8

0

ファイルを表示

 def test_classify_simple_case(self):
     text = u"Mở tài khoản ATM thì có đc quà ko ad"
     actual = classify(text, domain='bank')
     expected = ["CUSTOMER_SUPPORT"]
     self.assertEqual(expected, actual)

コード例 #9

0

ファイルを表示

 def test_classify_simple_case_3(self):
     text = u"Lãi suất từ BIDV rất ưu đãi"
     actual = classify(text, domain='bank')
     expected = ['DISCOUNT']
     self.assertEqual(expected, actual)

コード例 #10

0

ファイルを表示

 def test_classify_simple_case_2(self):
     text = u"Dkm t chuyển vẫn bị mất phí"
     actual = classify(text, domain='bank')
     expected = ['INTEREST_RATE', 'MONEY_TRANSFER']
     self.assertEqual(expected, actual)

コード例 #11

0

ファイルを表示

ファイル: tokenizer.py プロジェクト: svseas/spiderum

    fileName = path + 'posts-' + str(i) + '.json'

    with open(fileName, 'r') as textFile:
        data = textFile.read()
        text = json.loads(data)

        #using bs to elimiate all tags in html bbody
        soup = BeautifulSoup(text['body'], features='lxml')
        allText = ' '.join(soup.findAll(text=True))

        #creat lists for the df
        listOfContent.append(allText)
        listOfTitles.append(text['title'])
        listOfTokenizedText.append(word_tokenize(allText))
        listOfCategories.append(classify(text['title']))
        listOfCurrentCategory.append(text['cat_id']['name'])

print('Succesfully loaded all data ')

df = pd.DataFrame(list(
    zip(listOfTitles, listOfContent, listOfTokenizedText,
        listOfCurrentCategory)),
                  columns=['Title', 'Body', 'Tokenized', 'Current Categories'])

print('DF CREATED!')
#print(df).
#convert all text into a mega text
allContent = list(df['Tokenized'].apply(pd.Series).stack())
allCategory = list(df['Current Categories'].apply(pd.Series).stack())
#get keywords and frequency

コード例 #12

0

ファイルを表示

ファイル: main.py プロジェクト: chiutuanbinh/nlp_model

def category_classify():
    text = request.data
    tagged = classify(text)
    return jsonify(tagged)

コード例 #13

0

ファイルを表示

ファイル: test_bank.py プロジェクト: winnyduong/underthesea

 def test_classify_simple_case(self):
     text = u"Mở tài khoản ATM thì có đc quà ko ad"
     actual = classify(text, domain='bank')
     expected = ("ACCOUNT", )
     self.assertEqual(actual, expected)

コード例 #14

0

ファイルを表示

ファイル: test_bank.py プロジェクト: winnyduong/underthesea

 def test_classify_simple_case_3(self):
     text = u"Lãi suất từ BIDV rất ưu đãi"
     actual = classify(text, domain='bank')
     expected = ('INTEREST RATE', )
     self.assertEqual(actual, expected)

コード例 #15

0

ファイルを表示

ファイル: test_bank.py プロジェクト: winnyduong/underthesea

 def test_classify_simple_case_2(self):
     text = u"Tôi rất thích cách phục vụ của nhân viên BIDV"
     actual = classify(text, domain='bank')
     expected = ('CUSTOMER SUPPORT', )
     self.assertEqual(actual, expected)

コード例 #16

0

ファイルを表示

 def test_classify_null_cases(self):
     sentence = u""
     actual = classify(sentence)
     expected = None
     self.assertEqual(actual, expected)

コード例 #17

0

ファイルを表示

 def test_classify_simple_case(self):
     text = u"HLV ngoại đòi gần tỷ mỗi tháng dẫn dắt tuyển Việt Nam 54"
     actual = classify(text)[0]
     expected = Text("The thao")
     self.assertEqual(actual, expected)

コード例 #18

0

ファイルを表示

ファイル: httpsvr.py プロジェクト: hnahtit/undertheseaApi

def classification():
    # Lay staff id cua client gui len
    source_string = request.args.get('text')
    # Tra ve cau chao Hello
    return ''.join(classify(str(source_string)))