Beispiel #1
0
        # Check if the given column values are normal!
        if len(columns) > col_key and len(columns) > col_value:
            the_key = columns[col_key].strip()
            the_value = columns[col_value].strip()
            # Add the new pair in the dict. If the key is already
            # existing, it will be updated with the new value!
            my_dict[the_key] = the_value
        else:
            print("Warning. Bad number of columns for line: {0}".format(line))

    return my_dict

# ----------------------------------------------------------------------------

if __name__ == '__main__':

    lines = read_file(my_file)

    # before doing something, check the data!
    if not len(lines):
        print('Hum... the file was empty!')
        sys.exit(0)

    sampa_dict = extract_dict_from_lines(lines, 1, 2)
    my_list = ['a', 'b', 'c', 'd', 'e', 'f', 'E', 'g', 'a~', 'S']
    for phone in my_list:
        if phone in sampa_dict:
            print("Sampa phoneme {:s} is IPA {:s}.".format(phone, sampa_dict[phone]))
        else:
            print("Sampa phoneme {:s} has no IPA!".format(phone))
Beispiel #2
0
    for d in documents:
        if item in d:
            dw += 1.0
    if dw == 0.0:
        return 0.0

    return tf * (math.log(D / dw))


# ---------------------------------------------------------------------------
# Main program
# ---------------------------------------------------------------------------

if __name__ == '__main__':

    phones1 = read_file(corpus1)
    phones2 = read_file(corpus2)

    counter1 = collections.Counter(phones1)
    counter2 = collections.Counter(phones2)

    # Hapax
    hapax1 = [k for k in counter1.keys() if counter1[k] == 1]
    hapax2 = [k for k in counter2.keys() if counter2[k] == 1]
    print("Corpus 1, Number of hapax: {:d}.".format(len(hapax1)))
    print("Corpus 2, Number of hapax: {:d}.".format(len(hapax2)))

    # Zipf law
    ranks1 = get_ranks(counter1)
    ranks2 = get_ranks(counter2)
    for t in ['@', 'e', "E"]: