def test(): file = "testRelationRaw.txt" cnt = 1 sql_template = 'update freebase set rank_value="{}", trsut="{}", max_trust_relation="{}" where id={}' conn, cursor = init() with open(file, 'r') as data: lines = data.readlines() print(len(lines)) for line in lines: contents = line.split('\t') triplet = contents[0] real_relation = contents[1] max_trust_relation = contents[2] rank = contents[3] trust = contents[4] print(triplet) print(real_relation) print(max_trust_relation) print(rank) print(trust) sql = sql_template.format(rank, str(1 - float(trust)), max_trust_relation, cnt) print(sql) cursor.execute(sql) conn.commit() cnt += 1 print(cnt)
def get_entity_name(entity_id): sql = "select * from freebase_entity_id where entity_id=" + entity_id conn, cursor = init() # 初始化mysql cursor.execute(sql) result = cursor.fetchall() if len(result) == 0: return "" return result[0][1]
def get_weibo_user_follow(user_id=''): if user_id == '': return {'message': '请输入user_id'} conn, cursor = init() cursor.execute(user_follow_template.format(user_id)) results = cursor.fetchall() if len(results) == 0: return {'message': '该用户关注的人为0'} for row in results: pass
def advogato_data_KG_source(kind, entity, level): source_sql = source_template if kind == 'freebase': source_sql = "select * from freebase where source_entity='{}'" conn, cursor = init() sql = source_sql.format(entity) entity01 = {} cursor.execute(sql) results = cursor.fetchall() per_level_nodes = 0 contents = {} contents["type"] = "force" # 属性 type no_repeat_categories = ['指定实体', '关联实体'] category_to_int = {"指定实体":0, "关联实体":1} print(len(results)) print(no_repeat_categories) categories = [] for category in no_repeat_categories: node = {'name': category, 'keyword': {}, 'base': category} categories.append(node) contents["categories"] = categories print(contents) nodes = [] links = [] real_nodes = [] for row in results: source_node = row[1] target_node = row[2] entity01[source_node] = 1 # 表示已经查询过了 entity01[target_node] = 0 # 表示还未查询 if row[1] not in real_nodes: real_nodes.append(row[1]) if row[1] == entity: category = "指定实体" else: category = "关联实体" node = {'name': row[1], 'value': str(row[1]), 'category': category_to_int[category]} nodes.append(node) if row[2] not in real_nodes: real_nodes.append(row[2]) if row[2] == entity: category = "指定实体" else: category = "关联实体" node = {'name': row[2], 'value': str(row[2]), 'category': category_to_int[category]} nodes.append(node) link = {'source': real_nodes.index(row[1]), 'target': real_nodes.index(row[2]), 'value': row[3]} links.append(link) per_level_nodes += 1 if per_level_nodes > max_nodes_per_level: break helper(entity01, nodes, level - 1, conn, cursor, links, category_to_int, real_nodes, source_template) contents['nodes'] = list(nodes) contents['links'] = list(links) return contents
def advogato_data_KG_target(kind, entity): ''' 这个暂时没什么用 :param kind: :param entity: :return: ''' conn, cursor = init() if kind == 'source': sql = source_template.format(entity) else: sql = target_template.format(entity) cursor.execute(sql) results = cursor.fetchall() print(len(results)) nodes = [] # 最终结果 seconds = [] cnt = 0 for row in results: node = {'source': row[1], 'target': row[2], 'type': "resolved", 'rela': row[3]} nodes.append(node) seconds.append(row[2]) cnt += 1 if cnt > max_nodes_per_level: break thirds = [] cnt = 0 for source in seconds: sql = source_template.format(source) cursor.execute(sql) results = cursor.fetchall() for row in results: node = {'source': row[1], 'target': row[2], 'type': "resolved", 'rela': row[3]} nodes.append(node) thirds.append(row[2]) cnt += 1 if cnt > max_nodes_per_level: break cnt = 0 for source in thirds: sql = source_template.format(source) cursor.execute(sql) results = cursor.fetchall() for row in results: node = {'source': row[1], 'target': row[2], 'type': "resolved", 'rela': row[3]} nodes.append(node) cnt += 1 if cnt > max_nodes_per_level: break print(nodes) return nodes
def load_entity2id_into_mysql(file=path + "/freebase/entity2id.txt", sp='\t'): with open(file, 'r') as data: lines = data.readlines() cnt = 0 conn, cursor = init() #初始化mysql sql = 'insert into freebase_entity_id (entity_name, entity_id) values ("{}","{}")' for line in lines: contents = line.split(sp) entity_name = contents[0] entity_id = contents[1] print(entity_name, entity_id) cnt = cnt + 1 execute(cursor, conn, sql.format(entity_name, entity_id)) print(sql.format(entity_name, entity_id)) print(cnt)
def get_weibo_user_profile(user_id=''): if user_id == '': return {'message': '请输入user_id'} conn, cursor = init() cursor.execute(user_profile_template.format(user_id)) results = cursor.fetchall() if len(results) == 1: results = results[0] info = user_profile_info.format( results[1], results[2], results[3], results[4], results[5], ) return info
def load_into_mysql(file=path + "/freebase/test.txt", sp='\t'): with open(file, 'r') as data: lines = data.readlines() cnt = 0 conn, cursor = init() #初始化mysql sql = 'insert into freebase (source_entity, target_entity, relation) values ("{}","{}","{}")' for line in lines: contents = line.split(sp) source = contents[0] target = contents[1] relation = contents[2] print(source, target, relation) cnt = cnt + 1 execute(cursor, conn, sql.format(source, target, relation)) print(sql.format(source, target, relation)) print(cnt)
def load_data_into_mysql(): fname = "advogato" cnt = 0 content = "source = {}, target = {}, and relation = {}" conn, cursor = init() with open(fname, 'r') as file: for line in file: line = line.strip() combo = line.split(" ") source = combo[0] target = combo[1] relation = combo[2] if source == target: continue print(content.format(source, target, relation)) sql = sql_template.format(source, target, relation) execute(cursor, conn, sql) cnt += 1 print(cnt)
def get_freebase_info(source, target): source_name = get_entity_name(source) target_name = get_entity_name(target) sql_template = "select * from freebase where source_entity='{}' and target_entity='{}'" conn, cursor = init() # 初始化mysql print(sql_template.format(source_name, target_name)) cursor.execute(sql_template.format(source_name, target_name)) result = cursor.fetchall() if len(result) == 0: print("hahahahha") return {} infos = {} content = tuple(result[0]) print("content:", content) infos['rank'] = content[4] infos['trust'] = content[5] infos['max_trust_relation'] = content[6] print(infos) return infos
def generate_weibo_user_graph(user_id=''): if user_id == '': return {'message': '请输入user_id'} contents = {} contents["type"] = "force" ## 类别 categories = [] no_repeat_categories = [ '关注数量', '粉丝', '微博内容数量', '原创微博', '转发微博', '微博发布地点', '微博发布时间', '微博点赞数', '微博转发数', '微博评论数', '微博发布工具', '关注', '微博检索用户' ] category_to_int = { '关注数量': 0, '粉丝': 1, '微博内容数量': 2, '原创微博': 3, '转发微博': 4, '微博发布地点': 5, '微博发布时间': 6, '微博点赞数': 7, '微博转发数': 8, '微博评论数': 9, '微博发布工具': 10, '关注': 11, '微博检索用户': 12 } for category in no_repeat_categories: node = {'name': category, 'keyword': {}, 'base': category} categories.append(node) contents["categories"] = categories node_name_value = {} ### 节点nodes nodes = [] links = [] real_nodes = [] ## user profile conn, cursor = init() cursor.execute(user_profile_template.format(user_id)) profile = cursor.fetchall() if len(profile) == 0: return "", "" if len(profile) == 1: profile = profile[0] node = { 'name': profile[1], 'value': profile[1], 'category': category_to_int['微博检索用户'] } nodes.append(node) real_nodes.append(profile[1]) node_name_value[profile[1]] = profile[1] node_name_value['微博内容数量'] = profile[3] node = { 'name': '微博内容数量', 'value': profile[3], 'category': category_to_int['微博内容数量'] } nodes.append(node) node_name_value['关注数量'] = profile[4] node = { 'name': '关注数量', 'value': profile[4], 'category': category_to_int['关注数量'] } nodes.append(node) node_name_value['粉丝'] = profile[5] node = { 'name': '粉丝', 'value': profile[5], 'category': category_to_int['粉丝'] } nodes.append(node) real_nodes.append('微博内容数量') real_nodes.append('关注数量') real_nodes.append('粉丝') link = { 'source': real_nodes.index(profile[1]), 'target': real_nodes.index('关注数量'), 'value': node_name_value['关注数量'] } links.append(link) link = { 'source': real_nodes.index(profile[1]), 'target': real_nodes.index('粉丝'), 'value': node_name_value['粉丝'] } links.append(link) link = { 'source': real_nodes.index(profile[1]), 'target': real_nodes.index('微博内容数量'), 'value': node_name_value['微博内容数量'] } links.append(link) # user follow cursor.execute(user_follow_template.format(user_id)) follows = cursor.fetchall() for follow in follows: follow_name = follow[3] follow_link = follow[4] node_name_value[follow_name] = follow_link node = { 'name': follow_name, 'value': follow_link, 'category': category_to_int['关注'] } nodes.append(node) real_nodes.append(follow_name) link = { 'source': real_nodes.index('关注数量'), 'target': real_nodes.index(follow_name), 'value': node_name_value[follow_name] } links.append(link) # user contents cursor.execute(weibo_contents_template.format(user_id)) weibo_contents = cursor.fetchall() cnt = 1 for content in weibo_contents: text = content[3] postion = content[4] time = content[5] up = content[6] repost = content[7] comment = content[8] tool = content[9] real_nodes.append('微博内容' + str(cnt)) node_name_value['微博内容' + str(cnt)] = text if '转发理由' in text: cate = '转发微博' else: cate = '原创微博' node = { 'name': '微博内容' + str(cnt), 'value': text, 'category': category_to_int[cate] } nodes.append(node) link = { 'source': real_nodes.index('微博内容数量'), 'target': real_nodes.index('微博内容' + str(cnt)), 'value': node_name_value['微博内容' + str(cnt)] } links.append(link) real_nodes.append('微博发布地点' + str(cnt)) node_name_value['微博发布地点' + str(cnt)] = postion node = { 'name': '微博发布地点' + str(cnt), 'value': postion, 'category': category_to_int['微博发布地点'] } nodes.append(node) link = { 'source': real_nodes.index('微博内容' + str(cnt)), 'target': real_nodes.index('微博发布地点' + str(cnt)), 'value': postion } links.append(link) real_nodes.append('微博发布时间' + str(cnt)) node_name_value['微博发布时间' + str(cnt)] = time node = { 'name': '微博发布时间' + str(cnt), 'value': time, 'category': category_to_int['微博发布时间'] } nodes.append(node) link = { 'source': real_nodes.index('微博内容' + str(cnt)), 'target': real_nodes.index('微博发布时间' + str(cnt)), 'value': time } links.append(link) real_nodes.append('微博点赞数' + str(cnt)) node_name_value['微博点赞数' + str(cnt)] = up node = { 'name': '微博点赞数' + str(cnt), 'value': up, 'category': category_to_int['微博点赞数'] } nodes.append(node) link = { 'source': real_nodes.index('微博内容' + str(cnt)), 'target': real_nodes.index('微博点赞数' + str(cnt)), 'value': up } links.append(link) real_nodes.append('微博转发数' + str(cnt)) node_name_value['微博转发数' + str(cnt)] = repost node = { 'name': '微博转发数' + str(cnt), 'value': repost, 'category': category_to_int['微博转发数'] } nodes.append(node) link = { 'source': real_nodes.index('微博内容' + str(cnt)), 'target': real_nodes.index('微博转发数' + str(cnt)), 'value': repost } links.append(link) real_nodes.append('微博评论数' + str(cnt)) node_name_value['微博评论数' + str(cnt)] = comment node = { 'name': '微博评论数' + str(cnt), 'value': comment, 'category': category_to_int['微博评论数'] } nodes.append(node) link = { 'source': real_nodes.index('微博内容' + str(cnt)), 'target': real_nodes.index('微博评论数' + str(cnt)), 'value': comment } links.append(link) real_nodes.append('微博发布工具' + str(cnt)) node_name_value['微博发布工具' + str(cnt)] = tool node = { 'name': '微博发布工具' + str(cnt), 'value': tool, 'category': category_to_int['微博发布工具'] } nodes.append(node) link = { 'source': real_nodes.index('微博内容' + str(cnt)), 'target': real_nodes.index('微博发布工具' + str(cnt)), 'value': tool } links.append(link) cnt += 1 contents['nodes'] = nodes contents['links'] = links return contents, profile[1]
print(rank) print(trust) sql = sql_template.format(rank, str(1 - float(trust)), max_trust_relation, cnt) print(sql) cursor.execute(sql) conn.commit() cnt += 1 print(cnt) if __name__ == '__main__': file = "testRelationRaw.txt" cnt = 1 sql_template = 'update freebase set rank_value="{}", trsut="{}", max_trust_relation="{}" where id={}' conn, cursor = init() with open(file, 'r') as data: lines = data.readlines() print(len(lines)) for line in lines: contents = line.split('\t') triplet = contents[0] real_relation = contents[1] max_trust_relation = contents[2] rank = contents[3] trust = contents[4] print(triplet) print(real_relation) print(max_trust_relation) print(rank) print(trust)