Exemple #1
0
def test_language_detector():
    detector = LanguageDetector(minimum_score=0.2, fallback_language="es")
    output_df = detector.detect_languages_df(
        INPUT_DF, "input_text").sort_values(by=["input_text"])
    for col in output_df.columns:
        np.testing.assert_array_equal(output_df[col].values,
                                      OUTPUT_DF[col].values)
Exemple #2
0
# -*- coding: utf-8 -*-
"""Language Detection recipe script"""

from plugin_config_loading import load_plugin_config_langdetect
from language_detector import LanguageDetector
from dku_io_utils import process_dataset_chunks, set_column_descriptions

# Setup
params = load_plugin_config_langdetect()
detector = LanguageDetector(
    language_scope=params["language_scope"],
    minimum_score=params["minimum_score"],
    fallback_language=params["fallback_language"],
)

# Run
process_dataset_chunks(
    input_dataset=params["input_dataset"],
    output_dataset=params["output_dataset"],
    text_column=params["text_column"],
    func=detector.detect_languages_df,
)
set_column_descriptions(
    input_dataset=params["input_dataset"],
    output_dataset=params["output_dataset"],
    column_descriptions=detector.column_descriptions,
)
                       presidenteSoloPostId_List[x] + '.json')
 sys.stdout = io.TextIOWrapper(sys.stdout.detach(), sys.stdout.encoding,
                               'backslashreplace')
 totalPositivos = 0
 totalComentarios = 0
 for data_json in datas_json:
     comentarios = []
     with open(
             data_json,
             mode='r',
             encoding='utf-8',
     ) as file:
         lector = json.load(file)
         for x in range(0, len(lector)):
             comentarios.append(lector[x]['message'])
     Id = LanguageDetector()
     # comentarios = [text for text in comentarios if Id.detect(text) == 'es']
     # for text in comentarios:
     #	print('{}: {}'.format(Id.detect(text), text))
     if len(comentarios) > 0:
         lista = cm.predict(comentarios, params)
         publiNoVacias += 1
     else:
         publiVacias += 1
         #todo with open(postIdsVacios )
     print(lista)
     comentariosPositivos = 0
     total = len(lista)
     totalComentarios += total
     print(total)
     for index in range(0, len(lista)):
Exemple #4
0
# -*- coding: utf-8 -*-

import sys
from language_detector import LanguageDetector

ld = LanguageDetector(ngrams_max=int(sys.argv[1]), data_dir="../data")
ld.process()

while True:
    var = input("\nPlease enter the text: ")
    results = ld.detect_language(var)
    for r in range(5):
        print(r + 1, results[r][0], results[r][1])