def create_file_test_percent(): print('---------create_file_test_percent---------') f_get_label_for_groups = read.elements_in_each_line_of_file( path=main_path + 'get_label_for_groups.txt') list_model = [ "__label__18-24", "__label__25-34", "__label__35-44", "__label__45-54", "__label__55+" ] matrix = [] count = 0 for line in f_get_label_for_groups: vector = [0] * 5 if line[0] != '': list_analysis = lib.analysis_list_sorted(line) for i in range(0, 5): vector[i] = 1 / (5 + len(line)) for obj in list_analysis: index = lib.find_index(list_model, obj["label"]) vector[index] = (obj["times"] + 1) / (len(line) + 5) else: vector = lib.random_vector_percent() count = count + 1 matrix.append(vector) write.matrix(path_file=main_path + 'test_percent.txt', list_data=matrix) print('Số lượng chưa đoán được trong tập train/tổng số test case') print(str(count) + "/" + str(len(matrix)))
def create_file_in_folder_label(pre_path): print('---------create_file_in_folder_label---------') data_main_train_sort = read.elements_in_each_line_of_file( pre_path + 'maintrain_sort.txt') i = 0 while i < len(data_main_train_sort): j = i check = True value_check = data_main_train_sort[i][0] #print(value_check) list_group = [] while j < len(data_main_train_sort) and check: if value_check == data_main_train_sort[j][0]: line = data_main_train_sort[j] for z in range(1, len(line)): list_group.append(line[z]) j = j + 1 else: check = False i = j list_group.sort() #print(len(list_group)) standard_list = lib.count_times_of_group(list_group) write.file_json(file_path=pre_path + 'divide_groups_in_label/', name_file=value_check, list_object=standard_list)
def create_data(pre_path): print('---------create_data_get_label_for_groups---------') list_name_file = read.name_file_in_folder(path_folfer=pre_path + 'groups_in') list_group_label = read.get_list_obj_in_file_json(path_file=pre_path + 'group_label.json') #print(list_name_file) count_num_lines_no_guess = 0 for name_file in list_name_file: list_data = read.elements_in_each_line_of_file( path=pre_path + 'groups_in/' + name_file) new_file = [] for line in list_data: new_line = [] for group in line: label = lib.find_obj_by_field(L=list_group_label, target=group, field="group_id") if label: new_line.append(label["label"]) new_line.sort() if len(new_line) > 0: new_file.append(new_line) else: count_num_lines_no_guess = count_num_lines_no_guess + 1 write.w_space_w_in_line(path_file=pre_path + 'get_label_for_groups/' + name_file, list_data=new_file) print("Quantity lines no guess in " + name_file + ": " + str(count_num_lines_no_guess))
def sort_file_main_train(pre_path): print('---------sort_file_main_train---------') list_main_train = read.elements_in_each_line_of_file(path=pre_path + 'maintrain.txt') list_main_train.sort(key=get_label) write.file_main_train(path_file=pre_path + 'maintrain_sort.txt', list_data=list_main_train)
def read_file_agedetector_group(): data_agedetector_group = read.elements_in_each_line_of_file( path=main_path + 'agedetector_group.txt') list_line_groups = [] for line in data_agedetector_group: line_groups = line[1:len(line)] list_line_groups.append(line_groups) write.w_space_w_in_line(path_file=main_path + 'main_test.txt', list_data=list_line_groups)
def create_all_groups(): print('---------create_all_groups---------') list_data = read.elements_in_each_line_of_file(path=main_path + 'main_test.txt') list_all_groups = [] for line in list_data: for group in line: list_all_groups.append(group) list_all_groups.sort() print(len(list_all_groups)) new_list_all = lib.distinct(list_all_groups) write.each_line_by_path(path_file=main_path + 'test_all_group.txt', list_data=new_list_all)
def create_file_get_label_for_groups(): print('---------create_file_get_label_for_groups---------') f_main_test = read.elements_in_each_line_of_file(path=main_path + 'main_test.txt') list_group_label = read.get_list_obj_in_file_json(path_file=main_path + 'group_label.json') new_file = [] for line in f_main_test: new_line = [] for group in line: label = lib.find_obj_by_field(L=list_group_label, target=group, field="group_id") if label: new_line.append(label["label"]) new_line.sort() new_file.append(new_line) write.w_space_w_in_line(path_file=main_path + 'get_label_for_groups.txt', list_data=new_file)
def create_data_for_groups_in(pre_path): print('---------create_data_for_groups_in---------') data_main_train_sort = read.elements_in_each_line_of_file(path=pre_path + 'maintrain_sort.txt') i = 0 while i < len(data_main_train_sort): j = i check = True value_check = data_main_train_sort[i][0] #print(value_check) list_group = [] while j < len(data_main_train_sort) and check: if value_check == data_main_train_sort[j][0]: line = data_main_train_sort[j] list_group.append(line[1:len(line)]) j = j + 1 else: check = False i = j write.w_space_w_in_line(pre_path + 'groups_in/' + value_check + '.txt', list_group) print("Quantity lines in " + str(value_check) + ": " + str(len(list_group)))
def create_data(pre_path): print('---------create_data_percent---------') list_file_name = read.name_file_in_folder(path_folfer=pre_path + 'get_label_for_groups') list_model = [ "__label__18-24", "__label__25-34", "__label__35-44", "__label__45-54", "__label__55+" ] for file_name in list_file_name: data_in_file = read.elements_in_each_line_of_file( path=pre_path + 'get_label_for_groups/' + file_name) print(file_name) matrix = [] for line in data_in_file: list_analysis = lib.analysis_list_sorted(line) vector = [0] * 5 for i in range(0, 5): vector[i] = 1 / (len(line) + 5) for obj in list_analysis: index = lib.find_index(list_model, obj["label"]) vector[index] = (obj["times"] + 1) / (len(line) + 5) matrix.append(vector) write.matrix(path_file=pre_path + 'percent/' + file_name, list_data=matrix)