def test_freq_analysis(self):
        """Test that it performs a proper frequency analysis."""
        test_string = random_string(20, string.ascii_lowercase)
        string_set = set(test_string)
        analysis = bar_chart.freq_analysis(test_string)

        for element in string_set:
            # Test that each element in the set is a key.
            self.assertIn(element, analysis)
            # Test that each element in the dictionary value matches the key.
            for i in analysis[element]:
                self.assertEqual(element, i)
            # Test that each dictionary value has the correct number of elements.
            self.assertEqual(test_string.count(element),
                             len(analysis[element]))

        # Test that it skips non-letters.
        test_string = random_string(20, string.punctuation + string.whitespace)
        analysis = bar_chart.freq_analysis(test_string)
        self.assertDictEqual(analysis, {})

        # Test that it converts uppercase to lowercase.
        test_string = random_string(20, string.ascii_uppercase)
        analysis = bar_chart.freq_analysis(test_string)
        for key in analysis.keys():
            self.assertTrue(key.islower())
Example #2
0
 def test_cleanup_list(self):
     """Test that it removes single letter words from a list of words."""
     random_list = [random_string(1) for _ in range(13)]
     random_list.extend([random_string(5) for _ in range(10)])
     clean_list = cleanup_dictionary.cleanup_list(random_list)
     self.assertEqual(len(clean_list), 10)
     for element in clean_list:
         self.assertEqual(len(element), 5)
 def test_get_id(self):
     """Test that it can convert a word to an ID."""
     # Test a random letter.
     test_letter = random_string(1, ascii_lowercase)
     test_letter_id = anagram_generator.get_id(test_letter)
     self.assertEqual(LETTER_PRIME_DICT[test_letter], test_letter_id)
     # Test a random string.
     test_string = random_string(30, ascii_lowercase)
     test_string_id = anagram_generator.get_id(test_string)
     actual_id = 1
     for letter in test_string:
         actual_id *= LETTER_PRIME_DICT[letter]
     self.assertEqual(actual_id, test_string_id)
Example #4
0
def test_join_random(seed, lt):
    random.seed(seed)
    ndata = int(random.expovariate(0.0005))
    nkeys = int(random.expovariate(0.01)) + 1
    st = random.choice(lt.stypes)
    if lt == ltype.bool:
        keys = [True, False]
    elif lt == ltype.int:
        nbits = (6 if st == stype.int8 else 12 if st == stype.int16 else 24)
        keys = list(set(random.getrandbits(nbits) for _ in range(nkeys)))
    elif lt == ltype.real:
        keys = [random.random() for _ in range(nkeys)]
        if st == stype.float32:
            keys = list(set(dt.Frame(keys, stype=st).topython()[0]))
        else:
            keys = list(set(keys))
    else:
        l = int(random.expovariate(0.05)) + 1
        keys = list(set(random_string(l) for _ in range(nkeys)))
    nkeys = len(keys)

    dkey = dt.Frame(KEY=keys, VAL=range(nkeys), stypes={"KEY": st})
    dkey.key = "KEY"
    keys, vals = dkey.topython()
    main = [random.choice(keys) for i in range(ndata)]
    dmain = dt.Frame(KEY=main, stype=st)
    res = [vals[keys.index(main[i])] for i in range(ndata)]

    djoined = dmain[:, :, join(dkey)]
    djoined.internal.check()
    assert djoined.shape == (ndata, 2)
    assert djoined.names == ("KEY", "VAL")
    assert djoined.topython() == [main, res]
Example #5
0
def generate_str_column(allparams):
    """
    Generate and return a column with random string data. This is the most
    versatile generator, and includes multiple different "modes" of generation.
    """
    nrows = allparams["nrows"]
    quote = allparams["quote"]
    always_quote = random.random() < 0.2
    rr = (lambda x: x)
    if always_quote:
        rr = (lambda x: quote + x + quote)
    rmode = random.random()
    if rmode < 0:
        pass
    else:
        # Generate simple alphanumeric strings and make sure
        # the resulting column is not fully populated with numeric values.
        is_numeric = nrows > 0
        col = []
        while is_numeric:
            col = [
                rr(random_string(int(random.expovariate(0.01))))
                for _ in range(nrows)
            ]
            for row in col:
                try:
                    if row:
                        float(row)
                except:
                    is_numeric = False
                    break
        return col
Example #6
0
def test_re_match_random(seed):
    random.seed(seed)
    n = int(random.expovariate(0.001) + 100)
    k = random.randint(2, 12)
    random_re = ""
    random_len = 0
    while random_len < k:
        t = random.random()
        if t < 0.4:
            random_re += "."
        elif t < 0.6:
            random_re += random.choice("abcdefgh")
        elif t < 0.8:
            random_re += ".*"
            random_len += 3
        else:
            random_re += "\\w"
        random_len += 1
    random_rx = re.compile(random_re)

    src = [random_string(k) for _ in range(n)]
    frame = dt.Frame(A=src)
    frame_res = frame[:, f.A.re_match(random_rx)]
    assert frame_res.shape == (n, 1)

    res = [bool(re.fullmatch(random_rx, s)) for s in src]
    dtres = frame_res.to_list()[0]
    assert res == dtres
Example #7
0
def test_match_random(seed):
    random.seed(seed)
    n = int(random.expovariate(0.001) + 100)
    k = random.randint(2, 12)
    random_re = ""
    random_len = 0
    while random_len < k:
        t = random.random()
        if t < 0.4:
            random_re += "."
        elif t < 0.6:
            random_re += random.choice("abcdefgh")
        elif t < 0.8:
            random_re += ".*"
            random_len += 3
        else:
            random_re += "\\w"
        random_len += 1
    random_rx = re.compile(random_re)

    src = [random_string(k) for _ in range(n)]
    DT = dt.Frame(A=src)
    res = DT[:, match(f.A, random_rx)]
    assert_equals(res,
                  dt.Frame(A=[bool(re.fullmatch(random_rx, s)) for s in src]))
Example #8
0
 def test_recursive_ispalindrome(self):
     """Test that it can identify a pseudo-random palindrome."""
     random_string_ = random_string(10, string.ascii_lowercase)
     random_palindrome = random_string_ + random_string_[::-1]
     self.assertTrue(
         recursive_palindrome.recursive_ispalindrome(random_palindrome))
     # Test a word that isn't a palindrome.
     not_palindrome = 'cat'
     self.assertFalse(
         recursive_palindrome.recursive_ispalindrome(not_palindrome))
    def test_add_keys_to_dict(self):
        """Test add_keys_to_dict function."""
        # Test that it adds all ASCII lowercase letters to a dictionary.
        test_dict = foreign_chart.add_keys_to_dict({})
        for letter in string.ascii_lowercase:
            self.assertIn(letter, test_dict)

        # Test that it doesn't duplicate keys.
        random_letter = random_string(1, string.ascii_lowercase)
        random_dict = {random_letter: []}
        test_dict = foreign_chart.add_keys_to_dict(random_dict)
        self.assertDictEqual(test_dict, EMPTY_LETTER_DICT)
Example #10
0
def generate_str_column(allparams):
    """
    Generate and return a column with random string data. This is the most
    versatile generator, and includes multiple different "modes" of generation.
    """
    nrows = allparams["nrows"]
    quote = allparams["quote"]
    always_quote = random.random() < 0.2
    rr = (lambda x: x)
    if always_quote:
        rr = (lambda x: quote + x + quote)
    rmode = random.random()
    if rmode < 0:
        pass
    else:
        # Generate simple alphanumeric strings
        return [rr(random_string(int(random.expovariate(0.01))))
                for _ in range(nrows)]
Example #11
0
def test_empty_strings(seed, repl):
    # TODO: also test repl=None, which currently gets deserialized into empty
    # strings.
    alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
    random.seed(seed)
    ncols = random.randint(3, 10)
    nrows = int(random.expovariate(1 / 200) + 1)
    p = random.uniform(0.1, 0.5)
    src = []
    for i in range(ncols):
        src.append([(random_string(8) if random.random() < p else repl)
                    for j in range(nrows)])
        if src[i] == [repl] * nrows:
            src[i][0] = "!!!"
    colnames = list(alphabet[:ncols].upper())
    d0 = dt.Frame(src, names=colnames)
    assert d0.names == tuple(colnames)
    assert d0.ltypes == (ltype.str, ) * ncols
    text = d0.to_csv()
    d1 = dt.fread(text)
    frame_integrity_check(d1)
    assert d1.names == d0.names
    assert d1.stypes == d0.stypes
    assert d1.to_list() == src
Example #12
0
 def random_str():
     if random.random() < 0.1: return None
     return random_string(random.randint(1, 20))