Example #1
0
def test_str_to_list():
    table = htable(5)
    htable_put(table, "parrt", [2, 99, 3942])
    htable_put(table, "tombu", [6, 3, 1024, 99, 102342])
    assert htable_str(
        table) == "{tombu:[6, 3, 1024, 99, 102342], parrt:[2, 99, 3942]}"
    assert htable_buckets_str(table) == """0000->
Example #2
0
def myhtable_create_index(files):
    """
    Build an index from word to set of document indexes
    This does the exact same thing as create_index() except that it uses
    your htable.  As a number of htable buckets, use 4011.
    Returns a list-of-buckets hashtable representation.
    """

    # dct_index = defaultdict() # Create an empty dict
    # for file in files: # Iterate through every given file names
    #     s_content = get_text(file) # Turn each file name into a string content
    #     lst_word = words(s_content) # Turn the string content into a list of normalized words
    #     for word in lst_word: # For each normalized words, update the dict by word-file as key-value pairs
            # if word not in dct_index:
            #     dct_index[word] = {file} # If the key doesn't exist, create one
            # else:
            #     dct_index[word].add(file) # If the key exist, add the file name into the set of the file names under that word
    # return dct_index


    NBUCKETS = 4011
    table = htable(NBUCKETS) # Create an empty dict
    for idx,fname in enumerate(files): # Iterate through every given file names
        s_content = get_text(fname) # Turn each file name into a string content
        lst_word = words(s_content) # Turn the string content into a list of normalized words
        for word in lst_word: # For each normalized words, update the dict by word-file as key-value pairs
            set_IDs = htable_get(table, word)
            if set_IDs == None:
                htable_put(table, word, {idx}) # index or file name
            else:
                set_IDs.add(idx)
    return table
Example #3
0
def test_a_few():
    table = htable(5)
    for i in range(1, 11):
        htable_put(table, i, i)
    s = htable_str(table)
    assert s == "{5:5, 10:10, 1:1, 6:6, 2:2, 7:7, 3:3, 8:8, 4:4, 9:9}"
    s = htable_buckets_str(table)
    assert s == """0000->5:5, 10:10
Example #4
0
def test_str_to_set():
    table = htable(5)
    htable_put(table, "parrt", {2, 99, 3942})
    htable_put(table, "tombu", {6, 3, 1024, 99, 102342})
    assert htable_str(
        table
    ) == "{tombu:set([1024, 3, 99, 6, 102342]), parrt:set([2, 99, 3942])}"
    assert htable_buckets_str(table) == """0000->
Example #5
0
def test_int_to_int():
    table = htable(5)
    for i in range(1, 11):
        htable_put(table, i, i)
    s = htable_str(table)
    assert s=="{5:5, 10:10, 1:1, 6:6, 2:2, 7:7, 3:3, 8:8, 4:4, 9:9}"
    s = htable_buckets_str(table)
    assert s == """0000->5:5, 10:10
def myhtable_create_index(files):
    wordBook = htable(4011)

    fileIndex = 0
    for item in files:
        fileWords = set(words(get_text(item)))
        for word in fileWords:
            htable_put(wordBook,word, fileIndex)
        fileIndex += 1
    return wordBook
Example #7
0
def test_str_to_str():
    table = htable(5)
    htable_put(table, "a", "x")
    htable_put(table, "b", "y")
    htable_put(table, "c", "z")
    htable_put(table, "f", "i")
    htable_put(table, "g", "j")
    htable_put(table, "k", "k")
    s = htable_str(table)
    assert s == '{a:x, f:i, k:k, b:y, g:j, c:z}', "found " + s
    s = htable_buckets_str(table)
    assert s == """0000->
Example #8
0
def test_str_to_str():
    table = htable(5)
    htable_put(table, "a", "x")
    htable_put(table, "b", "y")
    htable_put(table, "c", "z")
    htable_put(table, "f", "i")
    htable_put(table, "g", "j")
    htable_put(table, "k", "k")
    s = htable_str(table)
    assert s=='{a:x, f:i, k:k, b:y, g:j, c:z}', "found "+s
    s = htable_buckets_str(table)
    assert s == """0000->
Example #9
0
def test_replace_str():
    table = htable(5)
    htable_put(table, "a", "x")
    htable_put(table, "b", "y")
    htable_put(table, "a", "z")
    htable_put(table, "a", "i")
    htable_put(table, "g", "j")
    htable_put(table, "g", "k")
    s = htable_str(table)
    assert s == '{a:i, b:y, g:k}', "found " + s
    s = htable_buckets_str(table)
    assert s == """0000->
Example #10
0
def test_replace_str():
    table = htable(5)
    htable_put(table, "a", "x")
    htable_put(table, "b", "y")
    htable_put(table, "a", "z")
    htable_put(table, "a", "i")
    htable_put(table, "g", "j")
    htable_put(table, "g", "k")
    s = htable_str(table)
    assert s == '{a:i, b:y, g:k}', "found " + s
    s = htable_buckets_str(table)
    assert s == """0000->
Example #11
0
def myhtable_create_index(files):
    """
    Build an index from word to set of document indexes
    This does the exact same thing as create_index() except that it uses
    your htable.  As a number of htable buckets, use 4011.
    Returns a list-of-buckets hashtable representation.
    """
    nbuckets = 4011
    table = htable(nbuckets)
    for value in range(0, len(files)):
        terms = get_text(files[value])
        terms = words(terms)
        for key in terms:
            table = htable_put(table, key, {value})

    return table
def myhtable_create_index(files):
    """
    Build an index from word to set of document indexes
    This does the exact same thing as create_index() except that it uses
    your htable.  As a number of htable buckets, use 4011.
    Returns a list-of-buckets hashtable representation.
    """
    d = htable(4011)  # initialize empty htable
    # k = 0
    for k, file in enumerate(files):  # loop through files
        # k = k + 1
        wordsInDoc = words(get_text(file))
        # print("len doc {:<4d}: {:<6d}".format(k, len(wordsInDoc)))
        for word in wordsInDoc:  # loop through words in that file
            htable_put(d, word, {files[k]})
            # print("word {:d} ({:<14s}), doc {:d}".format(i+1, word, k)) # warning: x6 runtime!
    #pp = pprint.PrettyPrinter(indent=4)
    #pp.pprint(d)
    return d
Example #13
0
def myhtable_create_index(files):
    """
    Build an index from word to set of document indexes
    This does the exact same thing as create_index() except that it uses
    your htable.  As a number of htable buckets, use 4011.
    Returns a list-of-buckets hashtable representation.
    """
    wordlist = [words(get_text(files[i])) for i in range(len(files))]
    table = htable(4011)

    for i in range(len(files)):
        for j in range(len(wordlist[i])):
            htable_put(table, wordlist[i][j], set())

    for i in range(len(files)):

        for j in range(len(wordlist[i])):

            htable_get(table, wordlist[i][j]).add(i)
    return table
Example #14
0
def myhtable_create_index(files):
    """
    Build an index from word to set of document indexes
    This does the exact same thing as create_index() except that it uses
    your htable.  As a number of htable buckets, use 4011.
    Returns a list-of-buckets hashtable representation.
    """
    if len(files) <= 0:
        return None

    table = htable(4011)
    for i in range(len(files)):
        file_content = get_text(files[i])
        key_words = words(file_content)
        for word in key_words:
            # because the value is a set, whenever a value
            # is added to hash table here, if the key is
            # is already in the hash table, the new value
            # is going to merged to the existing value.
            htable_put(table, word, set([i]))
    return table
Example #15
0
def test_empty():
    table = htable(5)
    assert htable_str(table) == "{}"
    assert htable_buckets_str(table) == """0000->
Example #16
0
def test_str_to_set():
    table = htable(5)
    htable_put(table, "parrt", {2, 99, 3942})
    htable_put(table, "tombu", {6, 3, 1024, 99, 102342})
    assert htable_str(table) == "{tombu:set([1024, 3, 99, 6, 102342]), parrt:set([2, 99, 3942])}"
    assert htable_buckets_str(table) == """0000->
Example #17
0
def test_str_to_list():
    table = htable(5)
    htable_put(table, "parrt", [2, 99, 3942])
    htable_put(table, "tombu", [6, 3, 1024, 99, 102342])
    assert htable_str(table) == "{tombu:[6, 3, 1024, 99, 102342], parrt:[2, 99, 3942]}"
    assert htable_buckets_str(table) == """0000->
Example #18
0
def test_singleon():
    table = htable(5)
    htable_put(table, "parrt", set([99]))
    assert htable_str(table) == "{parrt:set([99])}"
    assert htable_buckets_str(table) == """0000->
Example #19
0
def test_empty():
    table = htable(5)
    assert htable_str(table) == "{}"
    assert htable_buckets_str(table) == """0000->
Example #20
0
def test_singleton():
    table = htable(5)
    htable_put(table, "parrt", set([99]))
    assert htable_str(table) == "{parrt:{99}}"
    assert htable_buckets_str(table) == """0000->
Example #21
0
def test_single():
    table = htable(5)
    htable_put(table, "parrt", 99)
    assert htable_str(table) == "{parrt:99}"
    assert htable_buckets_str(table) == """0000->
Example #22
0
def test_single():
    table = htable(5)
    htable_put(table, "parrt", 99)
    assert htable_str(table) == "{parrt:99}"
    assert htable_buckets_str(table) == """0000->