def cal_sim_pos(sent_a, sent_b):
    """
    计算两个句子之间的词性相似度
    :param sent_a:
    :param sent_b:
    :return:
    """

    def __cal_vecmod(pos_dict):
        mod_result = 0.0
        for i in pos_dict.values():
            mod_result += i ** 2
        return math.sqrt(mod_result)

    pos_a = sentence_utils.get_pos(sent_a)
    pos_b = sentence_utils.get_pos(sent_b)
    pos_a_dict = {}
    pos_b_dict = {}
    # 建立句子a的pos词典
    for a in pos_a:
        if a[1] in pos_a_dict.keys():
            pos_a_dict[a[1]] += 1.0 / len(pos_a)
        else:
            pos_a_dict[a[1]] = 1.0 / len(pos_a)
    # 建立句子b的pos词典
    for b in pos_b:
        if b[1] in pos_b_dict.keys():
            pos_b_dict[b[1]] += 1.0 / len(pos_b)
        else:
            pos_b_dict[b[1]] = 1.0 / len(pos_b)
    # 处理二者间的相似度
    # 首先是分子的计算
    fenzi = 0.0
    for key_a in pos_a_dict:
        if key_a in pos_b_dict:
            fenzi += pos_a_dict[key_a] * pos_b_dict[key_a]
    # 计算分母
    fenmu = __cal_vecmod(pos_a_dict) * __cal_vecmod(pos_b_dict)
    if fenmu == 0:
        return 0
    else:
        return fenzi / fenmu
def get_graph_map_by_json(data_json, is_train=False):
    graph_map = {}
    for edge in data_json['edges']:
        source = edge['source']
        target = edge['target']
        if source in graph_map:
            graph_map[source]['neighbors'].append(target)
        else:
            graph_map[source] = {'content': data_json['nodes'][int(source)]['label'].split('-')[1],
                                 'neighbors': [target], 'label': random.choice(['0', '1']),
                                 'datetime': arrow.get(data_json['datetimes'][source], u'YYYY年MM月DD日 HH:mm',
                                                       tzinfo=tz.tzlocal()),
                                 'feature': {'pos': get_pos(data_json['nodes'][int(source)]['label'].split('-')[1])}}

        if target in graph_map:
            graph_map[target]['neighbors'].append(source)
        else:
            graph_map[target] = {'content': data_json['nodes'][int(target)]['label'].split('-')[1],
                                 'neighbors': [source], 'label': random.choice(['0', '1']),
                                 'datetime': arrow.get(data_json['datetimes'][source], u'YYYY年MM月DD日 HH:mm'),
                                 'feature': {'pos': get_pos(data_json['nodes'][int(source)]['label'].split('-')[1])}}
    return graph_map