def get_data_from_file(): json_path = get_json_file_path() json_choice = os.listdir(json_path) while True: json_filename = json_choice[numpy.random.randint(0, len(json_choice))] json_fileurl = os.path.join(json_path, json_filename) f = open(json_fileurl) data = json.load(f) type = get_data_type(data) if type == 'ocq': if len(data['o0']) > 3: break print(data) cat_number = len(data['c0']) choose_cat = numpy.random.randint(cat_number) new_data_array = [] for i, datum in enumerate(data['data_array']): if datum['c0'] == choose_cat: del datum['c0'] datum['id'] = datum['o0'] new_data_array.append(datum) data['title'] = f'Value of {data["c0"][choose_cat]}' del data['c0'] data['data_array'] = new_data_array data['type'] = 'oq' data = add_color(data, single=True) data = add_small_value(data) data = add_small_random(data) return data
def get_simple_OQ(): data = {} data['title'] = 'Value' data_number = numpy.random.randint(3, 11) data['o0'] = [2010 + i for i in range(data_number)] data['data_array'] = [] for i in range(data_number): datum = {} datum['o0'] = i datum['q0'] = numpy.random.randint(20, 100) datum['id'] = i data['data_array'].append(datum) data['unit'] = '' add_color(data, single=True) data = change_oq_order(data) # data[''] # print(data['data_array']) return data
def get_from_special_file(json_filename='0070.json'): json_path = get_json_file_path() json_fileurl = os.path.join(json_path, json_filename) f = open(json_fileurl) data = json.load(f) data = add_random_by_cat(data) data = change_cat_order(data) data = add_small_random(data, 0) data = add_small_value(data) data = del_long_name(data) data = add_color(data) data = add_type(data) data = add_vis_type(data) return data
def get_read_CCQ(json_path = "../data_collect_system/json/Ielts_new_principle/ielts_data/"): json_choice = os.listdir(json_path) while True: json_filename = json_choice[numpy.random.randint(0, len(json_choice))] json_filename = os.path.join(json_path, json_filename) # json_filename = os.path.join(json_path, '0019.json') with open(json_filename) as f: data = json.load(f) data = add_color(data) data = convert_ocq_ccq(data) if len(data['c0']) == 1 or len(data['c1']) == 1: continue; else: judge = numpy.random.randint(0,10) if judge < 5: data = change_ccq_cat_order(data) data = change_order_ccq(data) data = add_random_errors(data) data = del_long_name(data) # print(data) return data
def get_data_from_file(): json_path = get_json_file_path() json_choice = os.listdir(json_path) while True: # for i in range(10, 20): # json_filename = json_choice[i] json_filename = json_choice[numpy.random.randint(0, len(json_choice))] json_fileurl = os.path.join(json_path, json_filename) f = open(json_fileurl) data = json.load(f) type = get_data_type(data) if type == 'ocq': break # print(data) data = add_random_by_cat(data) data = change_cat_order(data) data = add_small_random(data, 0) data = add_small_value(data) data = del_long_name(data) data = add_color(data) data = add_type(data) data = add_vis_type(data) return data
def generate_a_special_prominent(rule_type, vis_type='load_group_bar_chart', ordinal_min=4, ordinal_max=10, category_min=2, category_max=6, main_dim='c0', second_dim='o0'): while True: ordinal_num = numpy.random.randint(ordinal_min, ordinal_max) category_num = numpy.random.randint(category_min, category_max) if ordinal_num * category_num < 30: break # print("category_num") # print(category_num) begin_num = numpy.random.random() cat_begin_value = [ numpy.random.uniform(0.1, 1) for i in range(category_num) ] # 归一化,最大值为1 max_cat_begin_value = max(cat_begin_value) cat_begin_value = [i / max_cat_begin_value for i in cat_begin_value] small_cat, big_cat = get_big_small_index(cat_begin_value) normal_trend = numpy.random.uniform(-10, 10) data_content = [] focus_coor = [] for i in range(category_num): this_focus = {} this_focus[0] = cat_begin_value[i] trend = normal_trend focus_coor.append([i, 0]) focus_coor.append([i, ordinal_num - 1]) this_focus[0], this_focus[ordinal_num - 1] = get_begin_end_value( this_focus[0], trend) data_content.append(this_focus) focus_id = [ change_coordinate_to_id(coor, ordinal_num) for coor in focus_coor ] compare_id = [] sentences = [] sentences.append({ 'focus_id': focus_id, 'compare_id': compare_id, 'type': 'all_trend' }) data_full = interpolate_data(data_content) special_list = get_special_list(category_num, max_num=1) focus_id = [] compare_id = [] special_ord = numpy.random.randint(1, ordinal_num - 1) special_ratio = get_special_ratio() for i in range(category_num): if i in special_list: if special_ratio > 1: data_full[i][special_ord] = max( data_full[i][special_ord - 1], data_full[i][special_ord + 1]) * special_ratio else: data_full[i][special_ord] = min( data_full[i][special_ord - 1], data_full[i][special_ord + 1]) * special_ratio # focus_id.append(change_coordinate_to_id([i, 0],ordinal_num)) special_cat = i diff_ratio_this = abs(data_full[special_cat][special_ord + 1] + data_full[special_cat][special_ord - 1] - 2 * data_full[special_cat][special_ord]) if (special_ord == 1): diff_ratio_previous = 0 else: diff_ratio_previous = abs( data_full[special_cat][special_ord] + data_full[special_cat][special_ord - 2] - 2 * data_full[special_cat][special_ord - 1]) if (special_ord == ordinal_num - 2): diff_ratio_next = 0 else: diff_ratio_next = abs(data_full[special_cat][special_ord] + data_full[special_cat][special_ord + 2] - 2 * data_full[special_cat][special_ord + 1]) if diff_ratio_previous > diff_ratio_next and diff_ratio_previous > diff_ratio_this: special_ord = special_ord - 1 elif diff_ratio_next > diff_ratio_previous and diff_ratio_next > diff_ratio_this: special_ord = special_ord + 1 focus_id.append( change_coordinate_to_id([i, special_ord - 1], ordinal_num)) focus_id.append( change_coordinate_to_id([i, special_ord], ordinal_num)) focus_id.append( change_coordinate_to_id([i, special_ord + 1], ordinal_num)) # focus_id.append(change_coordinate_to_id([i, ordinal_num - 1],ordinal_num)) focus_id = list(set(focus_id)) sentences.append({ 'focus_id': focus_id, 'compare_id': compare_id, 'type': 'local_trend' }) diff_max = judge_big_diff(data_full, small_cat, big_cat) if diff_max > 0.1: focus_id = [] compare_id = [] for i in range(category_num): if i in small_cat: for j in range(ordinal_num): focus_id.append( change_coordinate_to_id([i, j], ordinal_num)) elif i in big_cat: for j in range(ordinal_num): compare_id.append( change_coordinate_to_id([i, j], ordinal_num)) sentences.append({ 'focus_id': focus_id, 'compare_id': compare_id, 'type': 'compare_ave' }) data = generate_pack_data(data_full) data = add_color(data) data = add_small_value(data) data['vis_type'] = vis_type main_dim = 'c0' second_dim = 'o0' data['major_name'] = main_dim data['second_name'] = second_dim data['pre_gen_focus'] = sentences return data
def generate_single_complex_data(rule_type, vis_type='load_group_bar_chart', ordinal_min=3, ordinal_max=10, category_min=2, category_max=6, main_dim='c0', second_dim='o0'): while True: ordinal_num = numpy.random.randint(ordinal_min, ordinal_max) category_num = numpy.random.randint(category_min, category_max) if ordinal_num * category_num < 30: break # print("category_num") # print(category_num) begin_num = numpy.random.random() cat_begin_value = [ numpy.random.uniform(0.1, 1) for i in range(category_num) ] # 归一化,最大值为1 max_cat_begin_value = max(cat_begin_value) cat_begin_value = [i / max_cat_begin_value for i in cat_begin_value] small_cat, big_cat = get_big_small_index(cat_begin_value) normal_trend = numpy.random.uniform(-10, 10) # print(normal_trend) special_list = get_special_list(category_num, max_num=1) data_content = [] focus_coor = [] compare_coor = [] for i in range(category_num): this_focus = {} this_focus[0] = cat_begin_value[i] if i in special_list: begin_value = this_focus[0] middle_index = numpy.random.randint(1, ordinal_num - 1) end_value = this_focus[0] * (numpy.random.uniform(0.5, 1.8)) if numpy.random.random() > 0.5: middle_value = max(begin_value, end_value) * numpy.random.uniform(1.2, 2) else: middle_value = min(begin_value, end_value) * numpy.random.uniform(0.3, 0.8) this_focus[middle_index] = middle_value this_focus[ordinal_num - 1] = end_value focus_coor.append([i, 0]) focus_coor.append([i, middle_index]) focus_coor.append([i, ordinal_num - 1]) # print(f'this_focus: {this_focus}') else: trend = normal_trend compare_coor.append([i, 0]) compare_coor.append([i, ordinal_num - 1]) this_focus[ordinal_num - 1] = this_focus[0] * (1 + trend / 10.0) data_content.append(this_focus) focus_id = [ change_coordinate_to_id(coor, ordinal_num) for coor in focus_coor ] compare_id = [ change_coordinate_to_id(coor, ordinal_num) for coor in compare_coor ] sentences = [] sentences.append({ 'focus_id': focus_id, 'compare_id': compare_id, 'type': 'compare_trend' }) data_full = interpolate_data(data_content) diff_max = judge_big_diff(data_full, small_cat, big_cat) if diff_max > 0.1: focus_id = [] compare_id = [] for i in range(category_num): if i in small_cat: for j in range(ordinal_num): focus_id.append( change_coordinate_to_id([i, j], ordinal_num)) elif i in big_cat: for j in range(ordinal_num): compare_id.append( change_coordinate_to_id([i, j], ordinal_num)) sentences.append({ 'focus_id': focus_id, 'compare_id': compare_id, 'type': 'compare_ave' }) data = generate_pack_data(data_full) data = add_color(data) data = add_small_value(data) data['vis_type'] = vis_type data['major_name'] = 'c0' data['second_name'] = 'o0' data['pre_gen_focus'] = sentences return data
def generate_all_special_stack_data(rule_type, vis_type='load_stack_bar_chart', ordinal_min=5, ordinal_max=10, category_min=2, category_max=6): while True: ordinal_num = numpy.random.randint(ordinal_min, ordinal_max) category_num = numpy.random.randint(category_min, category_max) if ordinal_num * category_num < 30: break # print("category_num") # print(category_num) begin_num = numpy.random.random() cat_begin_value = [ numpy.random.uniform(0.1, 1) for i in range(category_num) ] # 归一化,最大值为1 max_cat_begin_value = max(cat_begin_value) cat_begin_value = [i / max_cat_begin_value for i in cat_begin_value] small_cat, big_cat = get_big_small_index(cat_begin_value) # while True: normal_trend = numpy.random.uniform(-10, 10) special_trend = numpy.random.uniform(-10, 10) if abs(normal_trend - special_trend) > 7 and abs( normal_trend) > 0.5 and abs(special_trend) > 0.5: break # print(normal_trend) # print(special_trend) special_list = get_special_list(category_num) # print(f'special_list: {special_list}') data_content = [] focus_coor = [] compare_coor = [] for i in range(category_num): this_focus = {} this_focus[0] = cat_begin_value[i] if i in special_list: trend = special_trend focus_coor.append([i, 0]) focus_coor.append([i, ordinal_num - 1]) else: trend = normal_trend compare_coor.append([i, 0]) compare_coor.append([i, ordinal_num - 1]) this_focus[ordinal_num - 1] = this_focus[0] * (1 + trend / 10.0) data_content.append(this_focus) sentences = [] if 2 * len(special_list) < category_num or category_num == 2: focus_id = [ change_coordinate_to_id(coor, ordinal_num) for coor in focus_coor ] compare_id = [ change_coordinate_to_id(coor, ordinal_num) for coor in compare_coor ] sentences.append({ 'focus_id': focus_id, 'compare_id': compare_id, 'type': 'compare_trend' }) data_full = interpolate_data(data_content) diff_max = judge_big_diff(data_full, small_cat, big_cat) sum_value = [ sum([data_full[i][j] for i in range(category_num)]) for j in range(ordinal_num) ] special_ord = numpy.random.randint(1, ordinal_num - 1) special_ratio = get_special_ratio() if special_ratio > 1: special_ratio = special_ratio * max( sum_value[special_ord - 1], sum_value[special_ord + 1]) / sum_value[special_ord] else: special_ratio = special_ratio * min( sum_value[special_ord - 1], sum_value[special_ord + 1]) / sum_value[special_ord] diff_ratio_this = abs(sum_value[special_ord + 1] + sum_value[special_ord - 1] - 2 * sum_value[special_ord]) if (special_ord == 1): diff_ratio_previous = 0 else: diff_ratio_previous = abs(sum_value[special_ord] + sum_value[special_ord - 2] - 2 * sum_value[special_ord - 1]) if (special_ord == ordinal_num - 2): diff_ratio_next = 0 else: diff_ratio_next = abs(sum_value[special_ord] + sum_value[special_ord + 2] - 2 * sum_value[special_ord + 1]) if diff_ratio_previous > diff_ratio_next and diff_ratio_previous > diff_ratio_this: special_ord = special_ord - 1 elif diff_ratio_next > diff_ratio_previous and diff_ratio_next > diff_ratio_this: special_ord = special_ord + 1 focus_id = [] compare_id = [] for i in range(category_num): data_full[i][special_ord] = data_full[i][special_ord] * special_ratio focus_id.append( change_coordinate_to_id([i, special_ord - 1], ordinal_num)) focus_id.append(change_coordinate_to_id([i, special_ord], ordinal_num)) focus_id.append( change_coordinate_to_id([i, special_ord + 1], ordinal_num)) sentences.append({ 'focus_id': focus_id, 'compare_id': compare_id, 'type': 'local_sum_trend' }) sentences.append({ 'focus_id': focus_id, 'compare_id': compare_id, 'type': 'local_trend' }) sum_value = [ sum([data_full[i][j] for i in range(category_num)]) for j in range(ordinal_num) ] index_special = extract_trend_special(sum_value) focus_id = [] compare_id = [] for i in range(category_num): for j in range(ordinal_num): if j in index_special: focus_id.append(change_coordinate_to_id([i, j], ordinal_num)) sentences.append({ 'focus_id': focus_id, 'compare_id': compare_id, 'type': 'sum_trend' }) data = generate_pack_data(data_full) data = add_color(data) data = add_small_value(data) data['vis_type'] = vis_type data['major_name'] = 'o0' data['second_name'] = 'c0' data['pre_gen_focus'] = sentences return data
def generate_single_complex_stack_data(rule_type, vis_type='load_stack_bar_chart', ordinal_min=5, ordinal_max=10, category_min=2, category_max=6): while True: ordinal_num = numpy.random.randint(ordinal_min, ordinal_max) category_num = numpy.random.randint(category_min, category_max) if ordinal_num * category_num < 30: break # print("category_num") # print(category_num) begin_num = numpy.random.random() cat_begin_value = [ numpy.random.uniform(0.1, 1) for i in range(category_num) ] # 归一化,最大值为1 max_cat_begin_value = max(cat_begin_value) cat_begin_value = [i / max_cat_begin_value for i in cat_begin_value] small_cat, big_cat = get_big_small_index(cat_begin_value) # while True: normal_trend = numpy.random.uniform(-10, 10) special_trend = numpy.random.uniform(-10, 10) if abs(normal_trend - special_trend) > 7 and abs( normal_trend) > 0.5 and abs(special_trend) > 0.5: break # print(normal_trend) # print(special_trend) special_list = get_special_list(category_num, max_num=1) # print(f'special_list: {special_list}') data_content = [] focus_coor = [] compare_coor = [] sentences = [] need_complex_trend = False for i in range(category_num): this_focus = {} this_focus[0] = cat_begin_value[i] special_index = i begin_value = this_focus[0] if special_index in special_list: if begin_value > 0.2 * sum(cat_begin_value) or special_index == 0: need_complex_trend = True middle_index = numpy.random.randint(1, ordinal_num - 1) end_value = this_focus[0] * (numpy.random.uniform(0.5, 1.8)) if numpy.random.random() > 0.5: middle_value = max( begin_value, end_value) * numpy.random.uniform(1.2, 2) else: middle_value = min(begin_value, end_value) * numpy.random.uniform( 0.3, 0.8) this_focus[middle_index] = middle_value this_focus[ordinal_num - 1] = end_value focus_coor.append([special_index, 0]) focus_coor.append([special_index, middle_index]) focus_coor.append([special_index, ordinal_num - 1]) else: trend = special_trend focus_coor.append([i, 0]) focus_coor.append([i, ordinal_num - 1]) this_focus[0], this_focus[ordinal_num - 1] = get_begin_end_value( begin_value, trend) # print(f'this_focus: {this_focus}') else: trend = normal_trend compare_coor.append([special_index, 0]) compare_coor.append([special_index, ordinal_num - 1]) begin_value, end_value = get_begin_end_value(begin_value, trend) this_focus[0] = begin_value this_focus[ordinal_num - 1] = end_value data_content.append(this_focus) if need_complex_trend: focus_id = [ change_coordinate_to_id(coor, ordinal_num) for coor in focus_coor ] compare_id = [] sentences.append({ 'focus_id': focus_id, 'compare_id': compare_id, 'type': 'all_trend' }) if 2 * len(special_list) < category_num: focus_id = [ change_coordinate_to_id(coor, ordinal_num) for coor in focus_coor ] compare_id = [ change_coordinate_to_id(coor, ordinal_num) for coor in compare_coor ] sentences.append({ 'focus_id': focus_id, 'compare_id': compare_id, 'type': 'compare_trend' }) data_full = interpolate_data(data_content) diff_max = judge_big_diff(data_full, small_cat, big_cat) sum_value = [ sum([data_full[i][j] for i in range(category_num)]) for j in range(ordinal_num) ] index_special = extract_trend_special(sum_value) focus_id = [] compare_id = [] for i in range(category_num): for j in range(ordinal_num): if j in index_special: focus_id.append(change_coordinate_to_id([i, j], ordinal_num)) sentences.append({ 'focus_id': focus_id, 'compare_id': compare_id, 'type': 'sum_trend' }) for i in range(category_num): average_percent = sum( [data_full[i][j] / sum_value[j] for j in range(ordinal_num)]) data = generate_pack_data(data_full) data = add_color(data) data = add_small_value(data) data['vis_type'] = vis_type data['major_name'] = 'o0' data['second_name'] = 'c0' data['pre_gen_focus'] = sentences return data
def generate_ocq_group_data(rule_type, vis_type='load_group_bar_chart', ordinal_min=3, ordinal_max=10, category_min=2, category_max=6, main_dim='c0', second_dim='q0'): while True: ordinal_num = numpy.random.randint(ordinal_min, ordinal_max) category_num = numpy.random.randint(category_min, category_max) if ordinal_num * category_num < 30: break # print("category_num") # print(category_num) begin_num = numpy.random.random() cat_begin_value = [ numpy.random.uniform(0.1, 1) for i in range(category_num) ] # 归一化,最大值为1 max_cat_begin_value = max(cat_begin_value) cat_begin_value = [i / max_cat_begin_value for i in cat_begin_value] small_cat, big_cat = get_big_small_index(cat_begin_value) # while True: normal_trend = numpy.random.uniform(-10, 10) special_trend = numpy.random.uniform(-10, 10) if abs(normal_trend - special_trend) > 7 and abs( normal_trend) > 0.5 and abs(special_trend) > 0.5: break # print(normal_trend) # print(special_trend) special_list = get_special_list(category_num) if special_trend < normal_trend: tmp = normal_trend normal_trend = special_trend special_trend = tmp new_special_list = [ i for i in range(category_num) if i not in special_list ] special_list = new_special_list # print(f'special_list: {special_list}') data_content = [] focus_coor = [] compare_coor = [] for i in range(category_num): this_focus = {} this_focus[0] = cat_begin_value[i] if i in special_list: trend = special_trend focus_coor.append([i, 0]) focus_coor.append([i, ordinal_num - 1]) else: trend = normal_trend compare_coor.append([i, 0]) compare_coor.append([i, ordinal_num - 1]) this_focus[ordinal_num - 1] = this_focus[0] * (1 + trend / 10.0) data_content.append(this_focus) focus_id = [ change_coordinate_to_id(coor, ordinal_num) for coor in focus_coor ] compare_id = [ change_coordinate_to_id(coor, ordinal_num) for coor in compare_coor ] sentences = [] sentences.append({ 'focus_id': focus_id, 'compare_id': compare_id, 'type': 'compare_trend' }) data_full = interpolate_data(data_content) diff_max = judge_big_diff(data_full, small_cat, big_cat) if diff_max > 0.1: focus_id = [] compare_id = [] for i in range(category_num): if i in small_cat: for j in range(ordinal_num): focus_id.append( change_coordinate_to_id([i, j], ordinal_num)) elif i in big_cat: for j in range(ordinal_num): compare_id.append( change_coordinate_to_id([i, j], ordinal_num)) sentences.append({ 'focus_id': focus_id, 'compare_id': compare_id, 'type': 'compare_ave' }) # 添加一个神奇的特殊值 if main_dim == 'c0' and ordinal_num >= 5: focus_id = [] compare_id = [] special_cat = numpy.random.randint(category_num) special_ord = numpy.random.randint(1, ordinal_num - 1) special_ratio = get_special_ratio() if special_ratio > 1: data_full[special_cat][special_ord] = max( data_full[special_cat][special_ord - 1], data_full[special_cat][special_ord + 1]) * special_ratio else: data_full[special_cat][special_ord] = min( data_full[special_cat][special_ord - 1], data_full[special_cat][special_ord + 1]) * special_ratio focus_id.append( change_coordinate_to_id([special_cat, special_ord - 1], ordinal_num)) focus_id.append( change_coordinate_to_id([special_cat, special_ord], ordinal_num)) focus_id.append( change_coordinate_to_id([special_cat, special_ord + 1], ordinal_num)) sentences.append({ 'focus_id': focus_id, 'compare_id': compare_id, 'type': 'local_trend' }) data = generate_pack_data(data_full) data['major_name'] = main_dim data['second_name'] = second_dim data = add_color(data) # data = add_small_value(data) data['vis_type'] = vis_type data['pre_gen_focus'] = sentences return data