Python Preprocessing Beispiele, spanlp.domain.strategies.Preprocessing Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_punctuation():
    strategies = [RemovePunctuation()]
    message = "hola, que mas? esto. tiene varios, signos () de puntuación"
    expected = "hola que mas esto tiene varios signos  de puntuación"
    cleaned1 = Preprocessing().clean(data=message, clean_strategies=strategies)
    cleaned2 = Preprocessing(data=message, clean_strategies=strategies).clean()
    assert cleaned1 == expected
    assert cleaned2 == expected

Beispiel #2

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_abbrevations():
    strategies = [RemoveAbbreviations()]
    message = "xfa pongase el tapabocas pq me da es3 verlo sin eso. to2 debemos cuidarnos. chas gracias. salu2"
    expected = "pongase el tapabocas me da verlo sin eso. debemos cuidarnos. gracias."
    cleaned1 = Preprocessing().clean(data=message, clean_strategies=strategies)
    cleaned2 = Preprocessing(data=message, clean_strategies=strategies).clean()
    assert cleaned1 == expected
    assert cleaned2 == expected

Beispiel #3

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_email_address():
    strategies = [RemoveEmailAddress()]
    message = "Contacto: [email protected]"
    expected = "Contacto: "
    cleaned1 = Preprocessing().clean(data=message, clean_strategies=strategies)
    cleaned2 = Preprocessing(data=message, clean_strategies=strategies).clean()
    assert cleaned1 == expected
    assert cleaned2 == expected

Beispiel #4

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_numbers():
    strategies = [RemoveNumbers()]
    message = "esto tiene el 1, el 2"
    expected = "esto tiene el , el "
    cleaned1 = Preprocessing().clean(data=message, clean_strategies=strategies)
    cleaned2 = Preprocessing(data=message, clean_strategies=strategies).clean()
    assert cleaned1 == expected
    assert cleaned2 == expected

Beispiel #5

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_expand_abbrevations():
    strategies = [ExpandAbbreviations()]
    message = "pero xq tengo es3 si yo estaba bn en clase, ahora me duelen to2 los musculos"
    expected = "pero por que tengo estres si yo estaba bien en clase, ahora me duelen todos los musculos"
    cleaned1 = Preprocessing().clean(data=message, clean_strategies=strategies)
    cleaned2 = Preprocessing(data=message, clean_strategies=strategies).clean()
    assert cleaned1 == expected
    assert cleaned2 == expected

Beispiel #6

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_adjectives():
    strategies = [RemoveAdjectives()]
    message = "la voz era tenebrosa y la noche estaba fria y oscura hasta que de pronto algo luminoso apareció y"
    expected = "la voz era y la noche estaba y hasta que de pronto algo apareció y"
    cleaned1 = Preprocessing().clean(data=message, clean_strategies=strategies)
    cleaned2 = Preprocessing(data=message, clean_strategies=strategies).clean()
    assert cleaned1 == expected
    assert cleaned2 == expected

Beispiel #7

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_html_tags():
    strategies = [RemoveHtmlTags()]
    message = "<texto><strong>NLP:</strong> Natural y Limpia Programación ;)<br></texto>"
    expected = "NLP: Natural y Limpia Programación ;)"
    cleaned1 = Preprocessing().clean(data=message, clean_strategies=strategies)
    cleaned2 = Preprocessing(data=message, clean_strategies=strategies).clean()
    assert cleaned1 == expected
    assert cleaned2 == expected

Beispiel #8

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_conjunctions():
    strategies = [RemoveConjunctions()]
    message = "y entonces estaba programando aunque con sueño pero concentrado creando esta libreria"
    expected = "entonces estaba programando con sueño concentrado creando esta libreria"
    cleaned1 = Preprocessing().clean(data=message, clean_strategies=strategies)
    cleaned2 = Preprocessing(data=message, clean_strategies=strategies).clean()
    assert cleaned1 == expected
    assert cleaned2 == expected

Beispiel #9

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_pronouns():
    strategies = [RemovePronouns()]
    message = "Siempre estamos usando los pronombres yo tu usted ella nosotros"
    expected = "Siempre estamos usando los pronombres"
    cleaned1 = Preprocessing().clean(data=message, clean_strategies=strategies)
    cleaned2 = Preprocessing(data=message, clean_strategies=strategies).clean()
    assert cleaned1 == expected
    assert cleaned2 == expected

Beispiel #10

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_adverbs():
    strategies = [RemoveAdverbs()]
    message = "muchos años despues frente al peloton de fusilamiento lentamente recordaba..."
    expected = "muchos años frente al peloton de fusilamiento recordaba..."
    cleaned1 = Preprocessing().clean(data=message, clean_strategies=strategies)
    cleaned2 = Preprocessing(data=message, clean_strategies=strategies).clean()
    assert cleaned1 == expected
    assert cleaned2 == expected

Beispiel #11

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_articles():
    strategies = [RemoveArticles()]
    message = "en los textos se usan muchos los articulos."
    expected = "en textos se usan muchos articulos."
    cleaned1 = Preprocessing().clean(data=message, clean_strategies=strategies)
    cleaned2 = Preprocessing(data=message, clean_strategies=strategies).clean()
    assert cleaned1 == expected
    assert cleaned2 == expected

Beispiel #12

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_emoticons():
    strategies = [RemoveEmoticons()]
    message = "Los emoticones <3 :) :D ;) son muy usados y esta rosa tambien @}->--"
    expected = "Los emoticones son muy usados y esta rosa tambien"
    cleaned1 = Preprocessing().clean(data=message, clean_strategies=strategies)
    cleaned2 = Preprocessing(data=message, clean_strategies=strategies).clean()
    assert cleaned1 == expected
    assert cleaned2 == expected

Beispiel #13

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_accents():
    strategies = [RemoveAccents()]
    message = "Qué te parece la canción que cantó él?"
    expected = "Que te parece la cancion que canto el?"
    cleaned1 = Preprocessing().clean(data=message, clean_strategies=strategies)
    cleaned2 = Preprocessing(data=message, clean_strategies=strategies).clean()
    assert cleaned1 == expected
    assert cleaned2 == expected

Beispiel #14

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_accents():
    strategies = [RemoveStopWords()]
    message = "en los textos hay muchas palabras vacias. y en sobretodo en los chats hay por montones"
    expected = "textos muchas palabras vacias. sobretodo chats montones"
    cleaned1 = Preprocessing().clean(data=message, clean_strategies=strategies)
    cleaned2 = Preprocessing(data=message, clean_strategies=strategies).clean()
    assert cleaned1 == expected
    assert cleaned2 == expected

Beispiel #15

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_prepositions():
    strategies = [RemovePrepositions()]
    message = "ante todo es mejor cuidar a la naturaleza mediante buenas acciones. entre todos podemos."
    expected = "todo es mejor cuidar la naturaleza buenas acciones. todos podemos."
    cleaned1 = Preprocessing().clean(data=message, clean_strategies=strategies)
    cleaned2 = Preprocessing(data=message, clean_strategies=strategies).clean()
    assert cleaned1 == expected
    assert cleaned2 == expected

Beispiel #16

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_hashtags():
    strategies = [RemoveHashtags()]
    message = "Miren, la #clase de #NLP es muy interesante pueden #aprender un monton de #algoritmos"
    expected = "Miren, la  de  es muy interesante pueden  un monton de "
    cleaned1 = Preprocessing().clean(data=message, clean_strategies=strategies)
    cleaned2 = Preprocessing(data=message, clean_strategies=strategies).clean()
    assert cleaned1 == expected
    assert cleaned2 == expected

Beispiel #17

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_backticks():
    strategies = [RemoveBackTicks()]
    message = "Vamos pa`lante porque pa`lante es pa` allá"
    expected = "Vamos palante porque palante es pa allá"
    cleaned1 = Preprocessing().clean(data=message, clean_strategies=strategies)
    cleaned2 = Preprocessing(data=message, clean_strategies=strategies).clean()
    assert cleaned1 == expected
    assert cleaned2 == expected

Beispiel #18

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_urls():
    strategies = [RemoveUrls()]
    message = "Hola @team miren el link https://whoisjhonpuentes.web.app/ de ejemplo"
    expected = "Hola @team miren el link  de ejemplo"
    cleaned1 = Preprocessing().clean(data=message, clean_strategies=strategies)
    cleaned2 = Preprocessing(data=message, clean_strategies=strategies).clean()
    assert cleaned1 == expected
    assert cleaned2 == expected

Beispiel #19

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_user_mentions():
    strategies = [RemoveUserMentions()]
    message = "Hola @jhon, si viste que @freddy va a lanzar una nueva libreria Python para NLP?"
    expected = "Hola , si viste que  va a lanzar una nueva libreria Python para NLP?"
    cleaned1 = Preprocessing().clean(data=message, clean_strategies=strategies)
    cleaned2 = Preprocessing(data=message, clean_strategies=strategies).clean()
    assert cleaned1 == expected
    assert cleaned2 == expected

Beispiel #20

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_numbers_to_consonants_in_lower_case():
    strategies = [NumbersToConsonantsInLowerCase()]
    cleaned = Preprocessing().clean(data="El 9a70 e5 negr0 y ju6u370n", clean_strategies=strategies)
    assert cleaned == "El gat0 es negr0 y jugu3t0n"

Beispiel #21

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_numbers():
    strategies = [RemoveNumbers()]
    cleaned = Preprocessing().clean(data="la comida 1234567 estaba 3 rica20", clean_strategies=strategies)
    assert cleaned == "la comida  estaba  "

Beispiel #22

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_extra_spaces1():
    cleaned1 = Preprocessing().clean(data=None, clean_strategies=[RemoveExtraSpaces()])
    cleaned2 = Preprocessing(data=None, clean_strategies=[RemoveExtraSpaces()]).clean()
    assert cleaned1 == ""
    assert cleaned2 == ""

Beispiel #23

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_extra_spaces():
    cleaned = Preprocessing().clean(data="  con    muchos      espacios  ", clean_strategies=[RemoveExtraSpaces()])
    assert cleaned == "con muchos espacios"

Beispiel #24

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_numbers1():
    strategies = [RemoveNumbers()]
    cleaned = Preprocessing().clean(data="0123456789", clean_strategies=strategies)
    assert cleaned == ""

Beispiel #25

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_numbers_to_vowels_in_lower_case():
    strategies = [NumbersToVowelsInLowerCase()]
    cleaned = Preprocessing().clean(data="H0l4 m4r1c4, c0m0 v4m05", clean_strategies=strategies)
    assert cleaned == "Hola marica, como vamo5"

Beispiel #26

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_numbers_to_consonants_in_upper_case():
    strategies = [NumbersToConsonantsInUpperCase()]
    cleaned = Preprocessing().clean(data="E1 6A70 E5 NE9R0", clean_strategies=strategies)
    assert cleaned == "EL GAT0 ES NEGR0"

Beispiel #27

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_text_to_upper():
    strategies = [TextToUpper()]
    pre_processing = Preprocessing(data="estaré en mayuscula", clean_strategies=strategies)
    cleaned = pre_processing.clean()
    assert cleaned == "ESTARÉ EN MAYUSCULA"

Beispiel #28

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_numbers_to_vowels_in_upper_case():
    strategies = [NumbersToVowelsInUpperCase()]
    cleaned = Preprocessing().clean(data="H0L4 MAR1C4 C0M0 V4M05", clean_strategies=strategies)
    assert cleaned == "HOLA MARICA COMO VAMO5"

Beispiel #29

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_to_lower2():
    strategies = [TextToLower()]
    data = "ESTARE EN MINUSCULA"
    preprocessor = Preprocessing()
    result = preprocessor.clean(data=data, clean_strategies=strategies)
    assert result == 'estare en minuscula'

Beispiel #30

0

Datei anzeigen

Datei: test_palabrota.py Projekt: jfreddypuentes/spanlp

def test_remove_unicode_characters():
    strategies = [RemoveUnicodeCharacters()]
    pre_processing = Preprocessing(data="mensaje ¥con ¶ unicodeÆ", clean_strategies=strategies)
    cleaned = pre_processing.clean()
    assert cleaned == "mensaje con  unicode"