Beispiel #1
0
    def get_most_used_words_and_tweets(self,
                                       screen_name=None,
                                       language='spanish'):
        try:
            tweets = self.get_last_week_tweets(screen_name)
        except requests.exceptions.ConnectionError:
            raise ValueError('No hay conexion a Internet')

        tweets_array = reduce((lambda x, y: x + ', ' + y), tweets)
        counted_words = word_frequency.word_frequency(tweets_array,
                                                      language)[0:10]
        return self.create_words_and_tweets_matrix(tweets, counted_words)
def test_wikipedia_definition_text_french():
    assert word_frequency("Un texte est une série orale ou écrite de mots perçus comme constituant " \
                      "un ensemble cohérent, porteur de sens et utilizant les structures", 'french') == [('texte', 6.7),
                                                                                                         ('série', 6.7),
                                                                                                         ('orale', 6.7),
                                                                                                         ('écrite', 6.7),
                                                                                                         ('mots', 6.7),
                                                                                                         ('perçus', 6.7),
                                                                                                         ('comme', 6.7), (
                                                                                                             'constituant',
                                                                                                             6.7), (
                                                                                                             'ensemble',
                                                                                                             6.7),
                                                                                                         (
                                                                                                             'cohérent',
                                                                                                             6.7),
                                                                                                         ('porteur', 6.7),
                                                                                                         ('sens', 6.7),
                                                                                                         ('utilizant',
                                                                                                          6.7),
                                                                                                         ('les', 6.7), (
                                                                                                             'structures',
                                                                                                             6.7)
                                                                                                         ]
def test_param_none():
    with pytest.raises(ValueError):
        word_frequency(None)
def test_stop_words_apostophe_and_puntuation_english():
    assert word_frequency("you're,,,,,,, amazing.....",
                          "english") == [("amazing", 100)]
def test_two_words():
    assert word_frequency('hola hola', 'spanish') == [('hola', 100)]
def test_three_words_italian():
    assert word_frequency("ciao ciao donna", "italian") == [("ciao", 66.7),
                                                            ("donna", 33.3)]
def test_with_commas_italian():
    assert word_frequency(
        "Ciao, mi chiamo Raul, piacere di conoscerti, sono Raul",
        "italian") == [("raul", 33.3), ("ciao", 16.7), ("chiamo", 16.7),
                       ("piacere", 16.7), ("conoscerti", 16.7)]
def test_with_commas_german():
    assert word_frequency(
        "Hallo, ich bin Raul und freue mich, Sie kennenzulernen. Ich bin Raul",
        "german") == [("raul", 40), ("hallo", 20), ("freue", 20),
                      ("kennenzulernen", 20)]
def test_three_same_words():
    assert word_frequency("hola hola hola", "spanish") == [("hola", 100)]
def test_with_more_than_one_space():
    assert word_frequency("hola              hola        hola") == [("hola",
                                                                     100)]
def test_three_words_german():
    assert word_frequency("schwule hallo schwule",
                          "german") == [("schwule", 66.7), ("hallo", 33.3)]
def test_words_upper_case_lower_case_french():
    assert word_frequency('sAlut Salut SALUT sAlUt saluT',
                          'french') == [('salut', 100)]
def test_with_commas_french():
    assert word_frequency(
        "Bonjour, je m'appelle Juan, revi de vous rencontrer, je suis Juan",
        'french') == [('juan', 33.3), ('bonjour', 16.7), ("m'appelle", 16.7),
                      ('revi', 16.7), ('rencontrer', 16.7)]
def test_three_word_french():
    assert word_frequency('salut adieu salut', 'french') == [('salut', 66.7),
                                                             ('adieu', 33.3)]
def test_two_word_french():
    assert word_frequency('salut salut', 'french') == [('salut', 100)]
def test_with_exclamations_and_interrogations_english():
    assert word_frequency("today!!!!! is a great day???????") == [
        ("today", 33.3), ("great", 33.3), ("day", 33.3)
    ]
def test_only_stop_words_english():
    assert word_frequency("how are you", "english") == []
def test_text_with_more_than_one_space_english():
    assert word_frequency("hello   buddy") == [("hello", 50), ("buddy", 50)]
def test_two_words_german():
    assert word_frequency("hallo hallo", "german") == [("hallo", 100)]
def test_two_same_words_one_word_different():
    assert word_frequency("hola adios hola", "spanish") == [("hola", 66.7),
                                                            ("adios", 33.3)]
def test_words_upper_case_lower_case_german():
    assert word_frequency("schwule ScHWule SCHWULE SchWUle SCHWule",
                          "german") == [("schwule", 100)]
def test_punctuation_signs():
    assert word_frequency('¿¿¿¿????? ¡¡¡¡ ???? !!!!! ... ,,,,') == []
def test_use_stopwords_german():
    assert word_frequency(
        "hallo vom hallo von vor hallo wann warum hallo was weiter hallo weitere hallo",
        "german") == [("hallo", 66.7), ("wann", 11.1), ("warum", 11.1),
                      ("weitere", 11.1)]
def test_empty_list():
    '''
    Encontrado el fallo por el usuario de github https://github.com/enriquesanchezb
    '''
    assert word_frequency('') == []
def test_two_words_italian():
    assert word_frequency("ciao ciao", "italian") == [("ciao", 100)]
def test_param_integer():
    with pytest.raises(ValueError):
        word_frequency(124)
def test_words_upper_case_lower_case_italian():
    assert word_frequency("DONNA donna DOnna DOnNA doNNA",
                          "italian") == [("donna", 100)]
def test_param_float():
    with pytest.raises(ValueError):
        word_frequency(1.12334)
def test_param_bool():
    with pytest.raises(ValueError):
        word_frequency(False)
def test_stop_words_apostophe_english():
    assert word_frequency("You're amazing", "english") == [("amazing", 100)]