Example #1
0
def test_word_as_list():
    #check the aslist method that converts the word to a list that allows for digraphs
    assert mw._aslist('mana') == list('mana') #should perform identically to list when no digraphs
    assert mw._aslist('ngutu') != list('ngutu')
    assert mw._aslist('ngutu') == ['ng','u','t','u']
    assert mw._aslist('whakangaro') == ['wh', 'a', 'k', 'a', 'ng', 'a', 'r', 'o']
    assert mw._aslist('WhĀnGa') == ['Wh', 'Ā', 'nG', 'a']
    assert mw._aslist('awe awe') == list('awe awe')
Example #2
0
def get_all_children_counts():
    # get the word list
    cf = config.ConfigFile()
    json_path = (cf.configfile[cf.computername]['iwa_path'])
    json_filename = "all_words_for_iwa.json"
    full_json_path = json_path + json_filename
    with open(full_json_path, 'r') as f:
        unique_word_forms = json.load(f)

    nines = [x for x in unique_word_forms if x not in nines_to_exclude]
    nines = [x for x in nines if len(x) == 9]

    all_children = {}
    for word in nines:
        word_as_list = mw._aslist(word)
        for letter in set(word_as_list):
            if len(letter) == 1:
                children = get_children(word, letter)
                all_children[(word, letter)] = len(children)
Example #3
0
def get_children(input_string, compulsory_letter, minimum_length=3):
    '''
    Returns a list containing all the word forms (children)
    that can be made from the input_string.

    The input string can be of one of two forms
    a) A Māori word
    b) A Koru

    If the latter then any digraphs on the last row need to be reversed.
    '''

    # if minimum length is passed as a string *try* and convert to integer
    minimum_length = int(minimum_length)

    children = []

    # if the input string contains any reversed digraphs, reverse them
    # note that this can only happen with a koru on the last line
    # these 2 are mutually exclusive and will not interfere with eachother

    # swap digraphs around if necessary
    if input_string[6] + input_string[5] in pū.digraphs:
        input_string = list(input_string)
        input_string[6], input_string[5] = input_string[5], input_string[6]
        input_string = ''.join(input_string)
    elif input_string[5] + input_string[4] in pū.digraphs:
        input_string = list(input_string)
        input_string[5], input_string[4] = input_string[4], input_string[5]
        input_string = ''.join(input_string)
    else:
        pass  # no action required as there are no reversed digraphs

    input_string_as_list = mw._aslist(input_string)

    # get the word list
    db_access_info = pg_utils.get_db_access_info()
    with psycopg2.connect(database=db_access_info[0],
                          user=db_access_info[1],
                          password=db_access_info[2]) as connection:

        with connection.cursor() as cursor:

            all_word_forms_query = "SELECT * FROM pgt_word"
            cursor.execute(all_word_forms_query)
            unique_word_forms = cursor.fetchall()  # list of tuples

    connection.close()

    # list of strings
    unique_word_forms = [''.join(x) for x in unique_word_forms]
    for word in [x for x in unique_word_forms if len(x) >= minimum_length]:

        word_as_list = mw._aslist(word)

        is_child = False
        if not (Counter(word_as_list) - Counter(input_string_as_list)):
            is_child = True

        if is_child and compulsory_letter in word_as_list:
            children.append(word)

    return(children)
Example #4
0
def get_koru(seed_word, centre_letter=None):

    if seed_word in nines_to_exclude:
            return ValueError

    koru = [None] * 9  # initialise koru

    # listify the seed word and split into single letters and digraphs
    seed_word_as_list = mw._aslist(seed_word)

    # 'single_letters' and 'digraphs' will be emptied
    single_letters = [x for x in seed_word_as_list if
                      x in pū.all_single_letters]
    digraphs = [x for x in seed_word_as_list if x in pū.digraphs]
    digraphs_count = len(digraphs)

    if centre_letter is None:
        # randomly select 'centre letter'
        centre_letter = random.choice(single_letters)
    else:
        pass  # we are assuming we have a 'centre letter' that is in the word

    # remove it from single letters
    single_letters.remove(centre_letter)

    # add 'centre letter' to koru, 8 squares remaining to be filled
    koru[8] = centre_letter

    # randomly select and randomly place the digraphs (if any)
    # note there are constraints to the randomness
    if digraphs:
        # establish the squares where it is ok to put digraphs
        if centre_letter in pū.duals_left:  # w or n
            # avoid 'vertical digraphs'
            ok_digraph_squares = [(1, 2), (5, 4)]
        elif centre_letter in pū.duals_right:  # h
            # avoid 'vertical digraphs'
            ok_digraph_squares = [(0, 1), (6, 5)]
        else:
            # 'vertical digraphs' not an issue
            ok_digraph_squares = [(0, 1), (1, 2), (5, 4), (6, 5)]

        if digraphs_count == 1 or digraphs_count == 2:
            # randomly select 'digraph1' and remove it from digraphs
            digraph1 = random.choice(digraphs)
            digraphs.remove(digraph1)

            # randomly select the squares for 'digraph1'
            digraph1_squares = random.choice(ok_digraph_squares)

            # add 'digraph1' to koru
            koru[digraph1_squares[0]] = digraph1[0]  # 7 squares remaining
            koru[digraph1_squares[1]] = digraph1[1]  # 6 squares remaining

            if digraphs_count == 2:
                # select 'digraph2' and remove it from digraphs
                digraph2 = random.choice(digraphs)  # should only be 1 left
                digraphs.remove(digraph2)

                # place the 2nd digraph directly above or below the first
                if digraph1_squares == (0, 1):
                    digraph2_squares = (6, 5)
                if digraph1_squares == (6, 5):
                    digraph2_squares = (0, 1)
                if digraph1_squares == (1, 2):
                    digraph2_squares = (5, 4)
                if digraph1_squares == (5, 4):
                    digraph2_squares = (1, 2)

                # add 'digraph2 to koru'
                koru[digraph2_squares[0]] = digraph2[0]  # 5 squares remaining
                koru[digraph2_squares[1]] = digraph2[1]  # 4 squares remaining
    # End of Placing Digraphs

    # place the rest of the single letters (aside from that in the centre)
    if digraphs_count == 2:
            # 4 squares remaining
            # we may have 1 consonant to place in a specific position
            remaining_consonant = [x for x in single_letters if
                                   x in pū.consonants]
            if remaining_consonant:
                if digraph1_squares == (0, 1) or digraph1_squares == (6, 5):
                    koru[3] = remaining_consonant[0]
                else:
                    koru[7] = remaining_consonant[0]
                single_letters.remove(remaining_consonant[0])

            # randomly assign the remaining single letters
            # (should all be vowels)
            # 3 or 4 remaining
            for index, square in enumerate(koru):
                if square is None:
                    letter_to_place = random.choice(single_letters)
                    single_letters.remove(letter_to_place)
                    koru[index] = letter_to_place
    # DONE for 2 digraphs

    if digraphs_count == 1:
        # 6 letters remaining to place, of which at least 4 will be vowels

        # get *empty square on digraph row*
        if digraph1_squares == (0, 1):
            empty_square_on_digraph_row = 2
        if digraph1_squares == (1, 2):
            empty_square_on_digraph_row = 0
        if digraph1_squares == (6, 5):
            empty_square_on_digraph_row = 4
        if digraph1_squares == (5, 4):
            empty_square_on_digraph_row = 6

        # the *empty square on the digraph row* must have a vowel in it
        letter_to_place = random.choice([x for x in single_letters if
                                         x in pū.all_vowels])
        single_letters.remove(letter_to_place)
        koru[empty_square_on_digraph_row] = letter_to_place
        # 5 squares remaining

        # get *empty square underneath or above the digraph*
        if digraph1_squares == (0, 1):
            empty_square_underneath_or_above_the_digraph = 7
        if digraph1_squares == (1, 2):
            empty_square_underneath_or_above_the_digraph = 3
        if digraph1_squares == (6, 5):
            empty_square_underneath_or_above_the_digraph = 7
        if digraph1_squares == (5, 4):
            empty_square_underneath_or_above_the_digraph = 3

        # the *empty square underneath or above the digraph*
        # must have a vowel in it
        letter_to_place = random.choice([x for x in single_letters if
                                         x in pū.all_vowels])
        single_letters.remove(letter_to_place)
        koru[empty_square_underneath_or_above_the_digraph] = letter_to_place
        # 4 squares remaining

        # get the *empty square in the middle column*
        if digraph1_squares == (0, 1):
            empty_square_middle_column = 5
        if digraph1_squares == (1, 2):
            empty_square_middle_column = 5
        if digraph1_squares == (6, 5):
            empty_square_middle_column = 1
        if digraph1_squares == (5, 4):
            empty_square_middle_column = 1

        # the *empty square in the middle column* must have a vowel in it
        letter_to_place = random.choice([x for x in single_letters if
                                         x in pū.all_vowels])
        single_letters.remove(letter_to_place)
        koru[empty_square_middle_column] = letter_to_place
        # 3 squares remaining

    if digraphs_count == 1 and centre_letter in pū.consonants:
        # 3 squares remaining

        # get the *empty square in the middle row*
        if digraph1_squares == (0, 1):
            empty_square_middle_row = 3
        if digraph1_squares == (1, 2):
            empty_square_middle_row = 7
        if digraph1_squares == (6, 5):
            empty_square_middle_row = 3
        if digraph1_squares == (5, 4):
            empty_square_middle_row = 7

        # the *empty square in the middle row* must have a vowel in it
        letter_to_place = random.choice([x for x in single_letters if
                                         x in pū.all_vowels])
        single_letters.remove(letter_to_place)
        koru[empty_square_middle_row] = letter_to_place
        # 2 squares remaining

        # fill the remaining 2 squares with whatever letters remain
        for index, square in enumerate(koru):
            if square is None:
                letter_to_place = random.choice(single_letters)
                single_letters.remove(letter_to_place)
                koru[index] = letter_to_place

    if digraphs_count == 1 and centre_letter in pū.all_vowels:
        # 3 squares remaining (1 isolated and 2 vertically together)
        # 3C or 2C, 1V or 1C, 2V

        # mostly these will be 3 consonants and we just want to ensure that we
        # don't create any 'vertical digraphs'
        # and we want to keep vowels and consonants separate

        # get the *isolated empty square*
        if digraph1_squares == (0, 1):
            isolated_empty_square = 6
        if digraph1_squares == (1, 2):
            isolated_empty_square = 4
        if digraph1_squares == (6, 5):
            isolated_empty_square = 0
        if digraph1_squares == (5, 4):
            isolated_empty_square = 2

        # the *isolated empty square* must have a 'w' in it if we have one
        # otherwise a consonant
        if 'w' in single_letters:
            letter_to_place = 'w'
        else:
            letter_to_place = random.choice([x for x in single_letters if
                                             x in pū.consonants])

        single_letters.remove(letter_to_place)
        koru[isolated_empty_square] = letter_to_place
        # 2 squares remaining

        # get the *empty square in the middle row*
        if digraph1_squares == (0, 1):
            empty_square_middle_row = 3
        if digraph1_squares == (1, 2):
            empty_square_middle_row = 7
        if digraph1_squares == (6, 5):
            empty_square_middle_row = 3
        if digraph1_squares == (5, 4):
            empty_square_middle_row = 7

        # the *empty square in the middle row* must have a consonant in it if
        # we have one otherwise a vowel.
        try:
            letter_to_place = random.choice([x for x in single_letters if
                                             x in pū.consonants])
        except IndexError:
            # no consonants remaining
            letter_to_place = random.choice([x for x in single_letters if
                                             x in pū.all_vowels])

        single_letters.remove(letter_to_place)
        koru[empty_square_middle_row] = letter_to_place
        # 1 square remaining

        # fill the remaining square with whatever letter remains
        for index, square in enumerate(koru):
            if square is None:
                letter_to_place = random.choice(single_letters)
                single_letters.remove(letter_to_place)
                koru[index] = letter_to_place
    # DONE for 1 digraph

    if digraphs_count == 0:
        # no digraphs, so only the centre letter has been placed
        # we have 8 letters left to place
        if centre_letter in pū.all_vowels:
            vowel_first = True
        else:
            vowel_first = False
        for index, square in enumerate(koru[:-1]):
            if index % 2 == 0:  # 0, 2, 4, 6
                if vowel_first:
                    letter_to_place = random.choice(
                                      [x for x in single_letters
                                       if x in pū.all_vowels])
                else:
                    try:
                        letter_to_place = random.choice(
                                          [x for x in single_letters
                                           if x in pū.consonants])
                    except IndexError:
                        # no consonants remaining
                        letter_to_place = random.choice(
                                          [x for x in single_letters
                                           if x in pū.all_vowels])
            else:  # 1, 3, 5, 7
                if vowel_first:
                    try:
                        letter_to_place = random.choice(
                                          [x for x in single_letters
                                           if x in pū.consonants])
                    except IndexError:
                        # no consonants remaining
                        letter_to_place = random.choice(
                                          [x for x in single_letters
                                           if x in pū.all_vowels])
                else:
                    letter_to_place = random.choice([x for x in single_letters
                                                     if x in pū.all_vowels])
            single_letters.remove(letter_to_place)
            koru[index] = letter_to_place

    return ''.join(koru)
Example #5
0
def test_pangakupu_words():

    db_access_info = pg_utils.get_db_access_info()
    with psycopg2.connect(database=db_access_info[0],
                          user=db_access_info[1],
                          password=db_access_info[2]) as connection:

        with connection.cursor() as cursor:

            all_word_forms_query = "SELECT * FROM pgt_word"
            cursor.execute(all_word_forms_query)
            unique_word_forms = cursor.fetchall() #list of tuples
 
    connection.close()
    all_words_for_iwa = [''.join(x) for x in unique_word_forms] #list of strings

    #word counts
    assert len(all_words_for_iwa) == 11601
    c = Counter(len(x) for x in all_words_for_iwa)
    assert dict(c) == {1: 9,
                       2: 57,
                       3: 255,
                       4: 1099,
                       5: 1169,
                       6: 2691,
                       7: 1568,
                       8: 1949,
                       9: 830,
                       10: 971,
                       11: 451,
                       12: 279,
                       13: 164,
                       14: 54,
                       15: 35,
                       16: 10,
                       17: 6,
                       18: 3,
                       19: 1}

    assert sum(dict(c).values()) == 11601 #recheck the count
    assert sum([k * v for k, v in dict(c).items()]) == 83080 #letter counts
    assert len(set(all_words_for_iwa)) == 11601 #test for uniqueness
    
    #check every entry is lower case
    assert [x if x.lower() == x else 'derp' for x in all_words_for_iwa] == all_words_for_iwa

    #check every entry is free of punctuation
    assert [x if mw._isalllegalletters(x) else 'derp' for x in all_words_for_iwa] == all_words_for_iwa

    #check that the basics for all maori words hold
    for x in all_words_for_iwa:
        assert x == mw.MaoriWord(x).word

    #letter counts
    all_letters_for_iwa = []
    for x in all_words_for_iwa:
        all_letters_for_iwa.extend(mw._aslist(x))
    c = dict(Counter(all_letters_for_iwa))
    assert c == {'a': 14894,
                 'ā': 2252,
                 'e': 5125,
                 'ē': 281,
                 'h': 3970,
                 'i': 6765,
                 'ī': 627,
                 'k': 6882,
                 'm': 2406,
                 'n': 2002,
                 'ng': 1834,
                 'o': 5521,
                 'ō': 1216,
                 'p': 3733,
                 'r': 6270,
                 't': 5880,
                 'u': 5736,
                 'ū': 993,
                 'w': 1245,
                 'wh': 1807}

    assert sum(dict(c).values()) == 79439 #digraphs count as 1 letter

    #cross check letter counts from words vs direct letter counts
    assert 83080 == 79439 + c['ng'] + c['wh'] #digraphs count as 2 letters