def get_compare_value(data, focus_value, compare_value, focus_aggre_name, compare_aggre_name): sentences = [] relation, degree, ratio = get_compare_relation(focus_value, compare_value, data) sentence = f'The value in {focus_aggre_name} is {degree} {relation} {compare_aggre_name}' sentences.append(get_sentence_setting('compare', sentence)) if len(focus_value) + len(compare_value) == len( data['data_array']) and len(compare_value) > 2: sentence = f'The value in {focus_aggre_name} is {degree} {relation} others' sentences.append(get_sentence_setting('compare', sentence)) if relation != 'same as': if relation == 'higher than': extreme = 'highest' elif relation == 'lower than': extreme = 'lowest' if (len(focus_value) == 1): sentence = f'{focus_aggre_name} have the {extreme} {len(focus_value)} value' sentences.append(get_sentence_setting('compare', sentence)) else: sentence = f'{focus_aggre_name} have the {extreme} {len(focus_value)} value' sentences.append(get_sentence_setting('compare', sentence)) return sentences
def compare_diff(data, focus_data, compare_data, focus_name, compare_name, share_name, main_dimension, version='English'): focus_array = cal_compare_diff(focus_data) compare_array = cal_compare_diff(compare_data) # # print(focus_array, compare_array) sentences = [] relation, degree, range, ratio = compare(focus_array, compare_array) if range == "all" and len(focus_array) + len(compare_array) > 2 and len( data[main_dimension]) == len(focus_array) + len(compare_array): extreme = "maximum" if ratio < 1: extreme = "minimum" sentence = f"The difference among {share_name} reached its {extreme} in the category of {focus_name}" # # print(' '.join(sentence.split())) sentences.append(get_sentence_setting('compare_diff', sentence)) if relation == 'higher' or relation == 'lower': relation = relation + " than" else: degree = "almost" relation = "the same as" sentence = f"The difference among {share_name} in {focus_name} is {degree} {relation} than {compare_name}" sentences.append(get_sentence_setting('compare_diff', sentence)) return sentences
def cal_ccq_count_sentence(data, focus_id, focus_main_dim, focus_second_dim, focus_main_name, focus_second_name): values = [data['data_array'][i]['q0'] for i in focus_id] value_max = max(values) value_min = min(values) sentences = [] unit = data['unit'] sentence = f"The value of {focus_second_name} in {focus_main_name} ranges from {value_min} to {value_max} {unit}" sentences.append(get_sentence_setting('absolute_range', sentence)) # higher than: if sum(focus_main_dim) == len(focus_main_dim): sentence = f"The value of {focus_second_name} ranges from {value_min} to {value_max} {unit}" else: sentence = f"The value of {focus_main_name} in {focus_second_name} ranges from {value_min} to {value_max} {unit}" sentences.append(get_sentence_setting('absolute_range', sentence)) if sum(focus_main_dim) == len(focus_main_dim): sentence = f"All values of {focus_second_name} is higher than {value_min} {unit}" else: sentence = f"All values of {focus_main_name} in {focus_second_name} is higher than {value_min} {unit}" sentences.append(get_sentence_setting('absolute_higher', sentence)) if sum(focus_main_dim) == len(focus_main_dim): sentence = f"All values of {focus_second_name} is lower than {value_max} {unit}" else: sentence = f"All values of {focus_main_name} in {focus_second_name} is lower than {value_max} {unit}" sentences.append(get_sentence_setting('absolute_lower', sentence)) return sentences
def sentence_local_trend(data, focus_id, compare_id, major_name='c0', second_name='o0', version='English', fuzzy=True, ordinal_name='o0', category_name='c0'): if len(data[second_name]) < 4: return [] focus_id, compare_id, category_chosen, ordinal_chosen = modify_local_trend( data, focus_id, compare_id) sentences = [] if len(ordinal_chosen) < 3: return [] print("see see category chosen: ", category_chosen) ordinal_sum_quantity = get_sum_quantity_array(data, category_chosen) max_value = max(ordinal_sum_quantity) category_num = len(data['c0']) sentence = '' if ordinal_sum_quantity[ordinal_chosen[1]] > ordinal_sum_quantity[ ordinal_chosen[0]] and ordinal_sum_quantity[ ordinal_chosen[1]] > ordinal_sum_quantity[ordinal_chosen[2]]: # 这是一个补丁 if len(category_chosen) == 1: sentence = f' {data["title"]} of {get_aggre_name([data[major_name][i] for i in category_chosen], category_num)} suddenly increase to {ordinal_sum_quantity[ordinal_chosen[1]]} {data["unit"]} in {get_aggre_name([data[second_name][ordinal_chosen[1]]], 10)} ' else: sentence = f' there is an unusual rise in {get_aggre_name([data[second_name][ordinal_chosen[1]]], 10)} of {get_aggre_name([data[major_name][i] for i in category_chosen], category_num)}' sentences.append( get_sentence_setting('local_trend', sentence, focus_id, compare_id)) sentence = f' the sum value of {get_aggre_name([data[major_name][i] for i in category_chosen], category_num)} has an unusual rise in {get_aggre_name([data[second_name][ordinal_chosen[1]]], 10)} ' sentences.append( get_sentence_setting('local_sum_trend', sentence, focus_id, compare_id)) if ordinal_sum_quantity[ordinal_chosen[1]] < ordinal_sum_quantity[ ordinal_chosen[0]] and ordinal_sum_quantity[ ordinal_chosen[1]] < ordinal_sum_quantity[ordinal_chosen[2]]: sentence = f'There is an unusual drop in {get_aggre_name([data[second_name][ordinal_chosen[1]]], 10)} of {get_aggre_name([data[major_name][i] for i in category_chosen], category_num)}' sentences.append( get_sentence_setting('local_trend', sentence, focus_id, compare_id)) sentence = f' the sum value of {get_aggre_name([data[major_name][i] for i in category_chosen], category_num)} has an unusual drop in {get_aggre_name([data[second_name][ordinal_chosen[1]]], 10)} ' sentences.append( get_sentence_setting('local_sum_trend', sentence, focus_id, compare_id)) return sentences
def cal_ccq_single_sentence(data, focus_id, focus_main_name, focus_second_name): sentences = [] value = data['data_array'][focus_id[0]]['q0'] sentence = f'The value of {focus_main_name} in {focus_second_name} is {value}' sentences.append(get_sentence_setting('absolute_single', sentence)) return sentences
def sentence_compare_trend(data, focus_id, compare_id, major_name='c0', second_name='o0', version='English', fuzzy=True, quantity_name='q0', category_name='c0'): print("compare_trend: ", data) focus_id, compare_id, focus_category_chosen, focus_ordinal_chosen, compare_category_chosen, compare_ordinal_chosen, new_focus_related_id, new_compare_related_id = modify_compare_trend( data, focus_id, compare_id) if len(focus_category_chosen) == 0 or len( compare_category_chosen ) == 0 or len(focus_ordinal_chosen) < 2 or len(compare_ordinal_chosen) < 2: return [] max_value = max([datum[quantity_name] for datum in data['data_array']]) focus_name = get_aggre_name( [data[category_name][i] for i in focus_category_chosen], len(data[category_name])) compare_name = get_aggre_name( [data[category_name][i] for i in compare_category_chosen], len(data[category_name])) focus_segment_array, focus_parameter_array_ave, focus_quantity = get_ave_parameter( data, focus_category_chosen, focus_ordinal_chosen, max_value) compare_segment_array, compare_parameter_array_ave, compare_quantity = get_ave_parameter( data, compare_category_chosen, compare_ordinal_chosen, max_value) if len(focus_category_chosen) > 1: allow_absolute_value = False else: allow_absolute_value = True focus_sentence = get_sentence(data, focus_segment_array, focus_parameter_array_ave, focus_quantity, object_name='', allow_absolute_value=allow_absolute_value) focus_sentence = fix_sentence_end(focus_sentence) if len(compare_category_chosen) > 1: allow_absolute_value = False else: allow_absolute_value = True compare_sentence = get_sentence(data, compare_segment_array, compare_parameter_array_ave, compare_quantity, object_name='', allow_absolute_value=allow_absolute_value, is_compare=True) sentence = f'{data["title"]} of {focus_name} {focus_sentence}; while {compare_name} {compare_sentence}' sentences = [] sentences.append( get_sentence_setting('compare_trend', sentence, new_focus_related_id, new_compare_related_id)) return sentences
def sentence_sum_trend(data, focus_id, compare_id, major_name='c0', second_name='q0', version='English', fuzzy=True, ordinal_name='o0', category_name='c0'): focus_id, compare_id, category_chosen, ordinal_chosen, new_focus_related_id = modify_sum_trend( data, focus_id, compare_id) if len(ordinal_chosen) < 2: return [] ordinal_sum_quantity = get_sum_quantity_array(data, category_chosen) max_value = max(ordinal_sum_quantity) segment_array, segment_parameter_array = get_segment_parameter( ordinal_chosen, ordinal_sum_quantity, max_value) # 只有一个没办法称为sum if len(category_chosen) == 1: object_name = f'{data["title"]} of {get_aggre_name([data[category_name][i] for i in category_chosen], len(data[category_name]))}' else: object_name = f'The sum of {data["title"]} {get_aggre_name([data[category_name][i] for i in category_chosen], len(data[category_name]))}' sentence = get_sentence(data, segment_array, segment_parameter_array, ordinal_sum_quantity, object_name=object_name) sentences = [] # sentence = str(segment_parameter_array) sentences.append( get_sentence_setting('sum_trend', sentence, focus_id, compare_id)) if len(category_chosen) > 1: object_name = f'The value of {get_aggre_name([data[category_name][i] for i in category_chosen], len(data[category_name]))}' sentence = get_sentence(data, segment_array, segment_parameter_array, ordinal_sum_quantity, object_name=object_name, allow_absolute_value=False) sentences.append( get_sentence_setting('all_trend', sentence, new_focus_related_id, compare_id, sure=False)) return sentences
def compare_1d(data, focus_data, compare_data, focus_name, compare_name, share_name, main_dimension, version='English'): sentences = [] focus_array = cal_compare_1d(focus_data) compare_array = cal_compare_1d(compare_data) relation, degree, range, ratio = compare(focus_array, compare_array) unit = data['unit'] if relation == 'higher' or relation == 'lower': relation = relation + " than" else: degree = "almost" relation = "the same as" if len(focus_array) == 1: sentence = f'In the category of {share_name}, {data["title"]} of {focus_name} is {focus_array[0]} {unit}, which is {degree} {relation} {compare_name}' sentences.append(get_sentence_setting('compare', sentence)) sentences.append(get_sentence_setting('compare_ave', sentence)) # # print(focus_array, compare_array) if range == "all" and len(focus_array) + len(compare_array) > 2 and len( data[main_dimension]) == len(focus_array) + len(compare_array): extreme = "highest" if ratio < 1: extreme = "lowest" if len(focus_array) > 1: sentence = f"{focus_name} has the {extreme} {len(focus_array)} value in {share_name}" else: sentence = f"{focus_name} is the {extreme} value in the categery of {share_name}" # sentence = "the value in {} is the {} {} value in the category of {}".format(focus_name, extreme, len(focus_array), share_name) # # print(' '.join(sentence.split())) sentences.append(get_sentence_setting('compare', sentence)) sentence = "the value in {} is {} {} {} in the category of {}".format( focus_name, degree, relation, compare_name, share_name) # print(sentence) sentences.append(get_sentence_setting('compare', sentence)) return sentences
def get_count(data, focus_value, compare_value, focus_aggre_name, compare_aggre_name): # print('into count') sentences = [] V_all = [datum['q0'] for datum in data['data_array']] V_total_max = max(V_all) V_total_min = min(V_all) V_min = min(focus_value) V_max = max(focus_value) count = len(focus_value) if V_max == V_total_max: sentence = f'There are {count} categeries that higher than {V_min}' sentences.append(get_sentence_setting('count', sentence)) if V_min == V_total_min: sentence = f'There are {count} categeries that lower than {V_max}' sentences.append(get_sentence_setting('count', sentence)) return sentences
def cal_ccq_sum_sentence(data, focus_id, focus_main_dim, focus_second_dim, focus_main_name, focus_second_name): sum = 0 for i in focus_id: sum = sum + data['data_array'][i]['q0'] sentences = [] unit = data['unit'] sentence = f'The sum value of {focus_main_name} in {focus_second_name} is {sum} {unit}' sentences.append(get_sentence_setting('absolute_sum', sentence)) return sentences
def cal_ccq_average_sentence(data, focus_id, focus_main_dim, focus_second_dim, focus_main_name, focus_second_name): sum = 0 for i in focus_id: sum = sum + data['data_array'][i]['q0'] average = sum / len(focus_id) sentences = [] unit = data['unit'] sentence = f'The average value of {focus_main_name} in {focus_second_name} is {average} {unit}' sentences.append(get_sentence_setting('absolute_average', sentence)) return sentences
def get_range(data, focus_value, compare_value, focus_aggre_name, compare_aggre_name): # type = absolute_range/ absolute_higher/ sentences = [] V_all = [datum['q0'] for datum in data['data_array']] V_total_max = max(V_all) V_total_min = min(V_all) V_min = min(focus_value) V_max = max(focus_value) count = len(focus_value) if len(focus_value) == 1: sentence = f'The value of {focus_aggre_name} is {focus_value[0]}' sentences.append(get_sentence_setting('absolute_single', sentence)) return sentences if len(focus_value) == len(data['data_array']): focus_aggre_name = 'all categories' sentence = f'The value of {focus_aggre_name} are higher than {V_min}' sentences.append(get_sentence_setting('absolute_higher', sentence)) sentence = f'The value of {focus_aggre_name} are lower than {V_max}' sentences.append(get_sentence_setting('absolute_lower', sentence)) sentence = f'The value of {focus_aggre_name} ranges from {V_min} to {V_max}' sentences.append(get_sentence_setting('absolute_range', sentence)) return sentences
def compare_max(data, focus_data, compare_data, focus_name, compare_name, share_name, main_dimension, version='English'): focus_array, share_name_array = cal_compare_max(focus_data) compare_array, share_name_array = cal_compare_max(compare_data) # 当在第一个数组中出现的部分不在第二个数组中出现。 focus_array = list(set(focus_array)) compare_array = list(set(compare_array)) focus_other_array = [ name for name in share_name_array if name not in focus_array ] compare_other_array = [ name for name in share_name_array if name not in compare_array ] # print(share_name) focal_max_name = get_aggre_name(focus_array, len(share_name), version) focal_other_name = get_aggre_name(focus_other_array, 100, version) compare_max_name = get_aggre_name(compare_array, len(share_name), version) compare_other_name = get_aggre_name(compare_other_array, len(share_name), version) ret = [i for i in focus_array if i in compare_array] # # print(focal_max_name) # # print(compare_max_name) sentences = [] if len(ret) == 0 and len(focus_array) == 1 and len(compare_array) == 1: if len(compare_other_array) < 2: sentence = f"{focal_max_name} is higher than {focal_other_name} in {focus_name}, while {compare_max_name} is higher than {compare_other_name} in {compare_name}" else: sentence = f"{focal_max_name} is the maximum in {focus_name}, while {compare_max_name} is the maximum in {compare_name}" sentences.append(get_sentence_setting('compare_max', sentence)) return sentences
def compare_min(data, focus_data, compare_data, focus_name, compare_name, share_name, main_dimension, version='English'): focus_array = cal_compare_min(focus_data) compare_array = cal_compare_min(compare_data) # 当在第一个数组中出现的部分不在第二个数组中出现。 focus_array = list(set(focus_array)) compare_array = list(set(compare_array)) # print(share_name) focal_max_name = get_aggre_name(focus_array, len(share_name), version) compare_max_name = get_aggre_name(compare_array, len(share_name), version) ret = [i for i in focus_array if i in compare_array] # # print(focal_max_name) # # print(compare_max_name) sentences = [] if len(ret) == 0 and len(focus_array) == 1 and len(compare_array) == 1: sentence = f"{focal_max_name} is the minimum of {focus_name}, while {compare_max_name} is minimum of {compare_name} " sentences.append(get_sentence_setting('compare_min', sentence)) return sentences
def get_same(data, focus_value, compare_value, focus_aggre_name, compare_aggre_name): sentences = [] sentence = f'The value of {focus_aggre_name} is almost the same, around {int(numpy.mean(focus_value))} {data["unit"]}' sentences.append(get_sentence_setting('same', sentence)) return sentences
def compare_sum(data, focus_data, compare_data, focus_name, compare_name, share_name, main_dimension, version='English'): focus_array = cal_compare_sum(focus_data) compare_array = cal_compare_sum(compare_data) # print(focus_data.shape) # # print(focus_array, compare_array) sentences = [] relation, degree, range, ratio = compare(focus_array, compare_array) if range == "all" and len(focus_array) == 1 and len( data[main_dimension]) == len(focus_array) + len(compare_array): extreme = "highest" if ratio < 1: extreme = "lowest" if len(focus_array) > 1: if len(compare_array) + len(focus_array) > 2: sentence = f"{focus_name} has the {extreme} {len(focus_array)} sum value of {share_name}" sentences.append(get_sentence_setting('compare_sum', sentence)) sentence = f"{focus_name} has the {extreme} {len(focus_array)} average value of {share_name}" sentences.append(get_sentence_setting('compare_ave', sentence)) sentence = f"{focus_name} has the {extreme} {len(focus_array)} value of {share_name}" sentences.append(get_sentence_setting('compare_val', sentence)) else: sentence = f"The sum value of {share_name} in {focus_name} is {degree} {relation} {compare_name}" sentences.append(get_sentence_setting('compare_sum', sentence)) sentence = f"{data['title']} of {share_name} in {focus_name} is {degree} {relation} {compare_name}" sentences.append(get_sentence_setting('compare_ave', sentence)) sentence = f"The sum value of {share_name} in {focus_name} is {degree} {relation} {compare_name}" sentences.append(get_sentence_setting('compare_val', sentence)) else: if len(compare_array) > 1: sentence = f"{focus_name} has the {extreme} sum value of {share_name}" sentences.append(get_sentence_setting('compare_sum', sentence)) sentence = f"{focus_name} has the {extreme} average value of {share_name}" sentences.append(get_sentence_setting('compare_ave', sentence)) sentence = f"{focus_name} has the {extreme} value of {share_name}" sentences.append(get_sentence_setting('compare_val', sentence)) else: sentence = f"The sum value of {share_name} in {focus_name} is {degree} {relation} {compare_name}" sentences.append(get_sentence_setting('compare_sum', sentence)) sentence = f"The value of {share_name} in {focus_name} is {degree} {relation} {compare_name}" sentences.append(get_sentence_setting('compare_ave', sentence)) sentence = f"The sum value of {share_name} in {focus_name} is {degree} {relation} {compare_name}" sentences.append(get_sentence_setting('compare_val', sentence)) if version == 'Chinese': if extreme == 'highest': extreme = "大" else: extreme = "小" sentence = f"平均而言,{focus_name}是{share_name}中最{extreme}的" sentences.append(get_sentence_setting('compare_sum', sentence)) if len(focus_array) == 1: sentence = f"{focus_name} has sum value of {focus_array[0]} and it's {extreme}" sentences.append(get_sentence_setting('compare_sum', sentence)) sentence = f"{focus_name} has average value of {focus_array[0]/len(focus_data[0])} and it's {extreme}" sentences.append(get_sentence_setting('compare_ave', sentence)) sentence = f"{focus_name} has average value of {focus_array[0]/len(focus_data[0])} and it's {extreme}" sentences.append(get_sentence_setting('compare_ave', sentence)) if relation == 'higher' or relation == 'lower': relation = relation + " than" else: degree = "almost" relation = "the same as" if len(data[main_dimension]) == len(focus_array) + len(compare_array): if len(compare_array) > 1: sentence = f'The value of {focus_name} is {degree} {relation} others in {share_name}' else: sentence = f'The value of {focus_name} is {degree} {relation} {compare_name} in {share_name}' sentences.append( get_sentence_setting('compare_ave', sentence, sure=False)) sentence = f"The sum value of {share_name} in {focus_name} is {degree} {relation} {compare_name}" if version == 'Chinese': sentence = f"{share_name}的总和在{focus_name}中比{compare_name}{relation_chinese[relation]}{degree_chinese[degree]}" # shenme = 11 # sentence = f'the sum value in a is {shenme}' sentences.append(get_sentence_setting('compare_sum', sentence)) sentence = f"{data['title']} of {focus_name} is {degree} {relation} {compare_name} in the category of {share_name}" sentences.append(get_sentence_setting('compare_ave', sentence, sure=False)) return sentences