def create_file_analysis_all_group(pre_path):
    print('---------create_file_analysis_all_group---------')
    f_all_group = read.line_in_file_rstrip(path_file=pre_path +
                                           'all_group.txt')
    list_name_file = read.name_file_in_folder(path_folfer=pre_path +
                                              'divide_groups_in_label')
    list_file_search = []
    for name_file in list_name_file:
        list_data = read.get_list_obj_in_file_json(
            path_file=pre_path + 'divide_groups_in_label/' + name_file)
        list_file_search.append(list_data)
    matrix = []
    for group in f_all_group:
        list_find_times = []
        for data_file in list_file_search:
            result_find = lib.find_obj_by_field(L=data_file,
                                                target=group,
                                                field="group_id")
            if result_find:
                list_find_times.append(result_find["times"])
            else:
                list_find_times.append(0)
        matrix.append(list_find_times)
    write.matrix(path_file=pre_path + 'analysis_all_group.txt',
                 list_data=matrix)
def create_data(pre_path):
    print('---------create_data_get_label_for_groups---------')
    list_name_file = read.name_file_in_folder(path_folfer=pre_path +
                                              'groups_in')
    list_group_label = read.get_list_obj_in_file_json(path_file=pre_path +
                                                      'group_label.json')
    #print(list_name_file)
    count_num_lines_no_guess = 0
    for name_file in list_name_file:
        list_data = read.elements_in_each_line_of_file(
            path=pre_path + 'groups_in/' + name_file)
        new_file = []
        for line in list_data:
            new_line = []
            for group in line:
                label = lib.find_obj_by_field(L=list_group_label,
                                              target=group,
                                              field="group_id")
                if label:
                    new_line.append(label["label"])
            new_line.sort()
            if len(new_line) > 0:
                new_file.append(new_line)
            else:
                count_num_lines_no_guess = count_num_lines_no_guess + 1
        write.w_space_w_in_line(path_file=pre_path + 'get_label_for_groups/' +
                                name_file,
                                list_data=new_file)
        print("Quantity lines no guess in " + name_file + ": " +
              str(count_num_lines_no_guess))
Beispiel #3
0
def create_file_all_group(pre_path):
    print('---------create_file_all_group---------')
    list_file_name = read.name_file_in_folder(path_folfer=pre_path + 'divide_groups_in_label')
    list_group = []
    for name in list_file_name:
        data_in_file = read.line_in_file(path_file=pre_path + 'divide_groups_in_label/' + name)
        for line in data_in_file:
            obj = json.loads(line)
            list_group.append(obj["group_id"])
    list_group.sort()
    s_list_group = lib.distinct(list_group)
    write.each_line_by_path(path_file=pre_path + 'all_group.txt', list_data=s_list_group)
Beispiel #4
0
def read_list_percent():
    list_name_file = read.name_file_in_folder(path_folfer=path_percent_train)
    list_model = [
        '__label__18-24', '__label__25-34', '__label__35-44', '__label__45-54',
        '__label__55+'
    ]
    list_data_train_X = []
    list_label_y = []
    i = 0
    for file_name in list_name_file:
        data_file = read.matrix_percent(path=path_percent_train + file_name)
        label = list_model[i]
        for line in data_file:
            list_data_train_X.append(line)
            list_label_y.append(label)
        i = i + 1
    return list_data_train_X, list_label_y
Beispiel #5
0
def create_data(pre_path):
    print('---------create_data_percent---------')
    list_file_name = read.name_file_in_folder(path_folfer=pre_path +
                                              'get_label_for_groups')
    list_model = [
        "__label__18-24", "__label__25-34", "__label__35-44", "__label__45-54",
        "__label__55+"
    ]
    for file_name in list_file_name:
        data_in_file = read.elements_in_each_line_of_file(
            path=pre_path + 'get_label_for_groups/' + file_name)
        print(file_name)
        matrix = []
        for line in data_in_file:
            list_analysis = lib.analysis_list_sorted(line)
            vector = [0] * 5
            for i in range(0, 5):
                vector[i] = 1 / (len(line) + 5)
            for obj in list_analysis:
                index = lib.find_index(list_model, obj["label"])
                vector[index] = (obj["times"] + 1) / (len(line) + 5)
            matrix.append(vector)
        write.matrix(path_file=pre_path + 'percent/' + file_name,
                     list_data=matrix)