# Check if the given column values are normal! if len(columns) > col_key and len(columns) > col_value: the_key = columns[col_key].strip() the_value = columns[col_value].strip() # Add the new pair in the dict. If the key is already # existing, it will be updated with the new value! my_dict[the_key] = the_value else: print("Warning. Bad number of columns for line: {0}".format(line)) return my_dict # ---------------------------------------------------------------------------- if __name__ == '__main__': lines = read_file(my_file) # before doing something, check the data! if not len(lines): print('Hum... the file was empty!') sys.exit(0) sampa_dict = extract_dict_from_lines(lines, 1, 2) my_list = ['a', 'b', 'c', 'd', 'e', 'f', 'E', 'g', 'a~', 'S'] for phone in my_list: if phone in sampa_dict: print("Sampa phoneme {:s} is IPA {:s}.".format(phone, sampa_dict[phone])) else: print("Sampa phoneme {:s} has no IPA!".format(phone))
for d in documents: if item in d: dw += 1.0 if dw == 0.0: return 0.0 return tf * (math.log(D / dw)) # --------------------------------------------------------------------------- # Main program # --------------------------------------------------------------------------- if __name__ == '__main__': phones1 = read_file(corpus1) phones2 = read_file(corpus2) counter1 = collections.Counter(phones1) counter2 = collections.Counter(phones2) # Hapax hapax1 = [k for k in counter1.keys() if counter1[k] == 1] hapax2 = [k for k in counter2.keys() if counter2[k] == 1] print("Corpus 1, Number of hapax: {:d}.".format(len(hapax1))) print("Corpus 2, Number of hapax: {:d}.".format(len(hapax2))) # Zipf law ranks1 = get_ranks(counter1) ranks2 = get_ranks(counter2) for t in ['@', 'e', "E"]: