Beispiel #1
0
 def test_remove_dots(self):
     self.assertEqual(
         ParseTools.remove_dots("String with three dot pattern…"),
         "String with three dot pattern")
     self.assertEqual(
         ParseTools.remove_dots("String with three dot pattern …"),
         "String with three dot pattern ")
Beispiel #2
0
    def test_extract_sentences_regular(self):
        """
            Verify that extract_sentences can be correctly applied to strings with sentences whose only punctuation is at the end.
        """

        # Strings with one grammatically CORRECT sentence
        self.assertEqual(
            ParseTools.extract_sentences("Here is a declaritive sentence."),
            ["Here is a declaritive sentence."])
        self.assertEqual(
            ParseTools.extract_sentences("Is this an interrogative sentence?"),
            ["Is this an interrogative sentence?"])
        self.assertEqual(
            ParseTools.extract_sentences("This is an excalamtory sentence!"),
            ["This is an excalamtory sentence!"])

        # Strings with multiple grammatically CORRECT sentences
        self.assertEqual(
            ParseTools.extract_sentences(
                "This string has two sentences. Both of the sentences are grammatically correct."
            ), [
                "This string has two sentences.",
                "Both of the sentences are grammatically correct."
            ])

        # Strings with multiple grammatically INCORRECT sentences
        self.assertEqual(
            ParseTools.extract_sentences(
                "Sentences have this does two. Grammar not is."),
            ["Sentences have this does two.", "Grammar not is."])
Beispiel #3
0
 def test_extract_apostrophe_words_string_without_apostrophe_words(self):
     self.assertEqual(ParseTools.extract_apostrophe_words(""), [])
     self.assertEqual(
         ParseTools.extract_apostrophe_words(
             "This sentence has no words with an apostrophe"), [])
     self.assertEqual(ParseTools.extract_apostrophe_words("and'"), [])
     self.assertEqual(ParseTools.extract_apostrophe_words("'and"), [])
Beispiel #4
0
 def test_get_random_string_lengths(self):
     """
         Verify that the random strings produced by get_random_string contain the expected number of characters.
     """
     self.assertEqual(len(ParseTools.get_random_string(0)), 0)
     self.assertEqual(len(ParseTools.get_random_string(1)), 1)
     self.assertEqual(len(ParseTools.get_random_string(100)), 100)
Beispiel #5
0
 def test_is_proper_sentence_propers(self):
     """
         Verify that proper sentences (ones that start with a capital letter and end with a punctuation) can be identified as such.
     """
     self.assertTrue(ParseTools.is_proper_sentence("Regular sentence."))
     self.assertTrue(ParseTools.is_proper_sentence("Regular sentence..."))
     self.assertTrue(ParseTools.is_proper_sentence("Regular sentence?"))
     self.assertTrue(ParseTools.is_proper_sentence("Regular sentence!"))
Beispiel #6
0
 def test_extract_apostrophe_words_string_with_apostrophe_words(self):
     self.assertEqual(
         ParseTools.extract_apostrophe_words(
             "This sentence's got a word with an apostrophe"),
         ["sentence's"])
     self.assertEqual(ParseTools.extract_apostrophe_words("doesn't"),
                      ["doesn't"])
     self.assertEqual(ParseTools.extract_apostrophe_words("'don't'"),
                      ["don't"])
Beispiel #7
0
 def test_replace_all_replaced(self):
     """
         Verify that replace_all can correctly replace characters in the string
         that are not in meant to be kept.
     """
     self.assertEqual(ParseTools.replace_all(string.printable, " ", "⚾️"),
                      "  ")
     self.assertEqual(ParseTools.replace_all(string.printable, " ", "🇺🇸"),
                      "  ")
Beispiel #8
0
 def test_replace_all_not_replaced(self):
     """
         Verify that replace_all can correctly leave string with all characters present in the keeps unaltered.
     """
     self.assertEqual(
         ParseTools.replace_all(string.printable, " ", string.printable),
         string.printable)
     self.assertEqual(ParseTools.replace_all(string.printable, " ", "a"),
                      "a")
Beispiel #9
0
 def test_extract_words_regular_words(self):
     """
         Verify that extract_words can correctly pick out words in a string following common english rules.
     """
     self.assertEqual(ParseTools.extract_words("This string has words."),
                      ["This", "string", "has", "words"])
     self.assertEqual(ParseTools.extract_words("Nonsense word: asd"),
                      ["Nonsense", "word", "asd"])
     self.assertEqual(ParseTools.extract_words("Dashed-phrase"),
                      ["Dashed", "phrase"])
Beispiel #10
0
 def test_fix_ats(self):
     test_at_set = {'@realDonaldTrump', '@joe'}
     self.assertEqual(
         ParseTools.fix_ats('Tweet with @realDOOOONALDTRUMP', test_at_set),
         'Tweet with @realDonaldTrump')
     self.assertEqual(ParseTools.fix_ats('Tweet with @jump', test_at_set),
                      'Tweet with @joe')
     self.assertEqual(ParseTools.fix_ats('Tweet with no ats', test_at_set),
                      'Tweet with no ats')
     self.assertEqual(ParseTools.fix_ats('', test_at_set), '')
Beispiel #11
0
 def test_extract_hts_strings_with_hts(self):
     """
         Verify that extract_hts correctly returns the hashtags in the string in the order in which they occur.
     """
     self.assertEqual(
         ParseTools.extract_hts("String with a few #Hash #Tags"),
         ["#Hash", "#Tags"])
     self.assertEqual(
         ParseTools.extract_hts("String with a #Hashtags in weird #Places"),
         ["#Hashtags", "#Places"])
Beispiel #12
0
 def test_extract_words_quoted_words(self):
     self.assertEqual(
         ParseTools.extract_words("String with a 'quoted' word"),
         ["String", "with", "a", "quoted", "word"])
     self.assertEqual(
         ParseTools.extract_words("String with a 'quoted' word and don't"),
         ["String", "with", "a", "quoted", "word", "and", "don't"])
     self.assertEqual(
         ParseTools.extract_words("String with 'quoted' words 'don't'"),
         ["String", "with", "quoted", "words", "don't"])
Beispiel #13
0
 def test_extract_ats_strings_with_ats(self):
     """
         Verify that extract_ats returns twitter usernames from string in the order in which they occur.
     """
     self.assertEqual(
         ParseTools.extract_ats("String with a few @twitter @names"),
         ["@twitter", "@names"])
     self.assertEqual(
         ParseTools.extract_ats(
             "@twitter String with names @cool in wide varitey of places @names"
         ), ["@twitter", "@cool", "@names"])
Beispiel #14
0
    def test_find_nearest_string_on_ats(self):
        """
            Verify that closest twitter username candidate can be found.
        """
        test_candidates = {"@realDonalTrump", "@FES", "@jack"}

        self.assertEqual(
            ParseTools.find_nearest_string("@trump", test_candidates),
            "@realDonalTrump")
        self.assertEqual(ParseTools.find_nearest_string("@j", test_candidates),
                         "@jack")
Beispiel #15
0
 def test_extract_sentences_twitter_words(self):
     """
         Verify behavior of extract_sentences when it enconters twitter words.
     """
     self.assertEqual(
         ParseTools.extract_sentences(
             "@TrumpPeeLannin I am there and and you can see the proof?"),
         ["@TrumpPeeLannin I am there and and you can see the proof?"])
     self.assertEqual(
         ParseTools.extract_sentences("@TheBearthen   Thanks!"),
         ["@TheBearthen   Thanks!"])
Beispiel #16
0
 def test_remove_outer_quotes_quoted_string(self):
     """
         Verify that remove_outer_quotes performs correcly on quoted strings.
     """
     self.assertEqual(
         ParseTools.remove_outer_quotes(
             '"String with double outer quotes"'),
         'String with double outer quotes')
     self.assertEqual(
         ParseTools.remove_outer_quotes(
             "'String with single outer quotes'"),
         "String with single outer quotes")
Beispiel #17
0
 def test_extract_ats_at_with_symbols(self):
     """
         Verify that extract_ats can catch twitter names when they are surrounded by symbols.
     """
     self.assertEqual(
         ParseTools.extract_ats(
             "String @realDonaldTrump: that has twitter name-colon combo"),
         ["@realDonaldTrump"])
     self.assertEqual(
         ParseTools.extract_ats(
             ".@realDonaldTrump String that has twitter name-period combo"),
         ["@realDonaldTrump"])
Beispiel #18
0
 def test_split_join_extra_space_remove(self):
     """
         Verify that using split/join string combo removes
         all spaces that are not in between words.
     """
     self.assertEqual(
         ParseTools.split_join("string with extra space at end "),
         "string with extra space at end")
     self.assertEqual(ParseTools.split_join(" word "), "word")
     self.assertEqual(
         ParseTools.split_join("string  with  extra     spaces"),
         "string with extra spaces")
Beispiel #19
0
 def test_remove_outer_quotes_invalid_cases(self):
     """
         Verify that remove_outer_quotes leaves alone strings that are not surrounded by two double quotes or single quotes alone.
     """
     self.assertEqual(
         ParseTools.remove_outer_quotes('String without outer quotes'),
         'String without outer quotes')
     self.assertEqual(
         ParseTools.remove_outer_quotes('String with one outer quote"'),
         'String with one outer quote"')
     self.assertEqual(
         ParseTools.remove_outer_quotes('"String with one outer quote'),
         '"String with one outer quote')
Beispiel #20
0
    def test_is_quoted_tweet_on_regular_strings(self):

        self.assertFalse(
            ParseTools.is_quoted_tweet(
                "\"String surrounded by double quotes\""))
        self.assertFalse(
            ParseTools.is_quoted_tweet("\'String surrounded single quotes\'"))
        self.assertFalse(
            ParseTools.is_quoted_tweet(
                "@realDonaldTrump: Missing surrounding quotes"))
        self.assertFalse(
            ParseTools.is_quoted_tweet(
                "@thequote: Missing surrounding quotes"))
Beispiel #21
0
 def test_extract_ats_strings_without_ats(self):
     """
         Verify that extract_ats correctly indicates that a string has no twitter usernames.
     """
     self.assertEqual(
         ParseTools.extract_ats("String with no twitter names"), [])
     self.assertEqual(
         ParseTools.extract_ats(
             "String with a twitter name without the at: realDonaldTrump"),
         [])
     self.assertEqual(
         ParseTools.extract_ats("String with just the @ symbol"), [])
     self.assertEqual(
         ParseTools.extract_ats("String with name like phrase @000"), [])
Beispiel #22
0
 def test_remove_at_prefixes_non_prefixed_strings(self):
     """
         Verify that remove_at_prefixes will not alter strings that do not begin with twitter usernames.
     """
     self.assertEqual(
         ParseTools.remove_at_prefixes("Tweet without an at prefix"),
         "Tweet without an at prefix")
     self.assertEqual(
         ParseTools.remove_at_prefixes(
             "Tweet with an at @realDonaldTrump but no at prefix"),
         "Tweet with an at @realDonaldTrump but no at prefix")
     self.assertEqual(
         ParseTools.remove_at_prefixes("a @realDonalTrump @FES"),
         "a @realDonalTrump @FES")
     self.assertEqual(ParseTools.remove_at_prefixes("@ a"), "@ a")
Beispiel #23
0
 def test_split_join_alt_chars(self):
     """
         Verify that using split/join string combo removes
         all space-like extra characters (carriage return, tab, etc.)
     """
     self.assertEqual(
         ParseTools.split_join("string with carriage \n return"),
         "string with carriage return")
     self.assertEqual(ParseTools.split_join("string with \t tab"),
                      "string with tab")
     self.assertEqual(
         ParseTools.split_join("string with carriage return\n"),
         "string with carriage return")
     self.assertEqual(ParseTools.split_join("string with tab\t"),
                      "string with tab")
Beispiel #24
0
    def test_find_nearest_string_string_in_candidates(self):
        """
            Verify that find_nearest_string can correctly match a string in the candidates with itself.
        """
        test_candidates = {"Oranges", "Tomatoes", "Grapes"}

        self.assertEqual(
            ParseTools.find_nearest_string("Oranges", test_candidates),
            "Oranges")
        self.assertEqual(
            ParseTools.find_nearest_string("Tomatoes", test_candidates),
            "Tomatoes")
        self.assertEqual(
            ParseTools.find_nearest_string("Grapes", test_candidates),
            "Grapes")
    def mimic(examples, punctuation_odds=.05):

        all_words = []
        example_word_counts = []

        for example in examples:
            example_words = ParseTools.extract_words(example)
            all_words += example_words
            example_word_counts.append(len(example_words))

        example_word_count_mean, example_word_count_std = np.mean(example_word_counts), np.std(example_word_counts)

        word_distribution = element_distribution(all_words)

        mimiced = []
        for _ in examples:
            sampled_length = np.clip(int(np.random.normal(example_word_count_mean, example_word_count_std)), a_min=1, a_max=None)

            mimiced_example = ""
            for word in sample(word_distribution, element_column_name='elements', probability_column_name='probabilities', n=sampled_length):
                for punctuation in {'.', '?', '!'}:
                    if np.random.uniform() <= punctuation_odds:
                        word += punctuation
                mimiced_example += word + " "
            mimiced_example = mimiced_example[:-1]

            mimiced.append(mimiced_example)

        return mimiced
def generate(model,
             gen_count,
             temperature,
             weight_adjust={
                 '.': 2,
                 '?': 2,
                 '!': 2,
                 ',': 2
             }):

    generations = []

    for _ in range(gen_count):

        generation = custom_generate(model,
                                     temperature=temperature,
                                     weight_adjustments=weight_adjust,
                                     include_stop_token=True)

        # If generation was able to finish (as opposed to being manually stopped),
        # and if tweet contains letters (is not just series of symbols)
        if generation[-3:] == '<s>' and ParseTools.contains_letters(
                generation[:-3]):

            # Clip stop character and save tweet
            generation = generation[:-3]
            generations.append(generation)

    return generations
Beispiel #27
0
 def test_remove_hts_strings_with_hts(self):
     """
         Verify that remove_hts correctly removes hashtags from string.
     """
     self.assertEqual(
         ParseTools.remove_hts("String with a few #Hash #Tags"),
         "String with a few  ")
Beispiel #28
0
 def test_remove_ats_strings_with_ats(self):
     """
         Verify that remove_ats removes twitter usernames from a string.
     """
     self.assertEqual(
         ParseTools.remove_ats("String with a few @twitter @names"),
         "String with a few  ")
Beispiel #29
0
 def test_remove_http_links_strings_without_http_links(self):
     """
         Verify that remove_http_links leaves a string without twitter http links unchanged.
     """
     self.assertEqual(
         ParseTools.remove_http_links("String with no http links"),
         "String with no http links")
Beispiel #30
0
 def test_remove_at_prefixes_prefixed_strings(self):
     """
         Verify that remove_at_prefixes can remove initial twitter usernames from a string.
     """
     self.assertEqual(
         ParseTools.remove_at_prefixes(
             "@realDonalTrump a tweet with an initial at prefix"),
         "a tweet with an initial at prefix")
     self.assertEqual(
         ParseTools.remove_at_prefixes(
             "@realDonalTrump @FES a tweet with initial at prefixes"),
         "a tweet with initial at prefixes")
     self.assertEqual(ParseTools.remove_at_prefixes("@realDonalTrump @FES"),
                      "")
     self.assertEqual(
         ParseTools.remove_at_prefixes(" @realDonalTrump @FES"), "")