def create_file_test_percent():
    print('---------create_file_test_percent---------')
    f_get_label_for_groups = read.elements_in_each_line_of_file(
        path=main_path + 'get_label_for_groups.txt')
    list_model = [
        "__label__18-24", "__label__25-34", "__label__35-44", "__label__45-54",
        "__label__55+"
    ]
    matrix = []
    count = 0
    for line in f_get_label_for_groups:
        vector = [0] * 5
        if line[0] != '':
            list_analysis = lib.analysis_list_sorted(line)
            for i in range(0, 5):
                vector[i] = 1 / (5 + len(line))
            for obj in list_analysis:
                index = lib.find_index(list_model, obj["label"])
                vector[index] = (obj["times"] + 1) / (len(line) + 5)
        else:
            vector = lib.random_vector_percent()
            count = count + 1
        matrix.append(vector)

    write.matrix(path_file=main_path + 'test_percent.txt', list_data=matrix)
    print('Số lượng chưa đoán được trong tập train/tổng số test case')
    print(str(count) + "/" + str(len(matrix)))
Esempio n. 2
0
def create_file_in_folder_label(pre_path):
    print('---------create_file_in_folder_label---------')
    data_main_train_sort = read.elements_in_each_line_of_file(
        pre_path + 'maintrain_sort.txt')
    i = 0
    while i < len(data_main_train_sort):
        j = i
        check = True
        value_check = data_main_train_sort[i][0]
        #print(value_check)
        list_group = []
        while j < len(data_main_train_sort) and check:
            if value_check == data_main_train_sort[j][0]:
                line = data_main_train_sort[j]
                for z in range(1, len(line)):
                    list_group.append(line[z])
                j = j + 1
            else:
                check = False
        i = j
        list_group.sort()
        #print(len(list_group))
        standard_list = lib.count_times_of_group(list_group)
        write.file_json(file_path=pre_path + 'divide_groups_in_label/',
                        name_file=value_check,
                        list_object=standard_list)
def create_data(pre_path):
    print('---------create_data_get_label_for_groups---------')
    list_name_file = read.name_file_in_folder(path_folfer=pre_path +
                                              'groups_in')
    list_group_label = read.get_list_obj_in_file_json(path_file=pre_path +
                                                      'group_label.json')
    #print(list_name_file)
    count_num_lines_no_guess = 0
    for name_file in list_name_file:
        list_data = read.elements_in_each_line_of_file(
            path=pre_path + 'groups_in/' + name_file)
        new_file = []
        for line in list_data:
            new_line = []
            for group in line:
                label = lib.find_obj_by_field(L=list_group_label,
                                              target=group,
                                              field="group_id")
                if label:
                    new_line.append(label["label"])
            new_line.sort()
            if len(new_line) > 0:
                new_file.append(new_line)
            else:
                count_num_lines_no_guess = count_num_lines_no_guess + 1
        write.w_space_w_in_line(path_file=pre_path + 'get_label_for_groups/' +
                                name_file,
                                list_data=new_file)
        print("Quantity lines no guess in " + name_file + ": " +
              str(count_num_lines_no_guess))
def sort_file_main_train(pre_path):
    print('---------sort_file_main_train---------')
    list_main_train = read.elements_in_each_line_of_file(path=pre_path +
                                                         'maintrain.txt')
    list_main_train.sort(key=get_label)
    write.file_main_train(path_file=pre_path + 'maintrain_sort.txt',
                          list_data=list_main_train)
def read_file_agedetector_group():
    data_agedetector_group = read.elements_in_each_line_of_file(
        path=main_path + 'agedetector_group.txt')
    list_line_groups = []
    for line in data_agedetector_group:
        line_groups = line[1:len(line)]
        list_line_groups.append(line_groups)
    write.w_space_w_in_line(path_file=main_path + 'main_test.txt',
                            list_data=list_line_groups)
def create_all_groups():
    print('---------create_all_groups---------')
    list_data = read.elements_in_each_line_of_file(path=main_path +
                                                   'main_test.txt')
    list_all_groups = []
    for line in list_data:
        for group in line:
            list_all_groups.append(group)
    list_all_groups.sort()
    print(len(list_all_groups))
    new_list_all = lib.distinct(list_all_groups)
    write.each_line_by_path(path_file=main_path + 'test_all_group.txt',
                            list_data=new_list_all)
def create_file_get_label_for_groups():
    print('---------create_file_get_label_for_groups---------')
    f_main_test = read.elements_in_each_line_of_file(path=main_path +
                                                     'main_test.txt')
    list_group_label = read.get_list_obj_in_file_json(path_file=main_path +
                                                      'group_label.json')
    new_file = []
    for line in f_main_test:
        new_line = []
        for group in line:
            label = lib.find_obj_by_field(L=list_group_label,
                                          target=group,
                                          field="group_id")
            if label:
                new_line.append(label["label"])
        new_line.sort()
        new_file.append(new_line)
    write.w_space_w_in_line(path_file=main_path + 'get_label_for_groups.txt',
                            list_data=new_file)
Esempio n. 8
0
def create_data_for_groups_in(pre_path):
    print('---------create_data_for_groups_in---------')
    data_main_train_sort = read.elements_in_each_line_of_file(path=pre_path + 'maintrain_sort.txt')
    i = 0
    while i < len(data_main_train_sort):
        j = i
        check = True
        value_check = data_main_train_sort[i][0]
        #print(value_check)
        list_group = []
        while j < len(data_main_train_sort) and check:
            if value_check == data_main_train_sort[j][0]:
                line = data_main_train_sort[j]
                list_group.append(line[1:len(line)])
                j = j + 1
            else:
                check = False
        i = j
        write.w_space_w_in_line(pre_path + 'groups_in/' + value_check + '.txt', list_group)
        print("Quantity lines in " + str(value_check) + ": " + str(len(list_group)))
Esempio n. 9
0
def create_data(pre_path):
    print('---------create_data_percent---------')
    list_file_name = read.name_file_in_folder(path_folfer=pre_path +
                                              'get_label_for_groups')
    list_model = [
        "__label__18-24", "__label__25-34", "__label__35-44", "__label__45-54",
        "__label__55+"
    ]
    for file_name in list_file_name:
        data_in_file = read.elements_in_each_line_of_file(
            path=pre_path + 'get_label_for_groups/' + file_name)
        print(file_name)
        matrix = []
        for line in data_in_file:
            list_analysis = lib.analysis_list_sorted(line)
            vector = [0] * 5
            for i in range(0, 5):
                vector[i] = 1 / (len(line) + 5)
            for obj in list_analysis:
                index = lib.find_index(list_model, obj["label"])
                vector[index] = (obj["times"] + 1) / (len(line) + 5)
            matrix.append(vector)
        write.matrix(path_file=pre_path + 'percent/' + file_name,
                     list_data=matrix)