def compute_updating_relation(self, obj): list_rela = [] # print("新个人数据网络的实例个数:", len(self.list_network)) for i in range(len(self.list_network)): iter_obj = self.list_network[i] # 迭代的对象 original_content = obj.get_content() if (original_content == "") or (original_content is None): original_content = str(obj.get_keywords()) if obj is iter_obj: continue iterate_content = iter_obj.get_content() + iter_obj.get_title() if (iterate_content == "") or (iterate_content is None): iterate_content = str(iter_obj.get_keywords()) # print("original_content:", original_content) # print("iter_cont:", iterate_content) # 计算两个不同文本的相似性 rate = Levenshtein.ratio(original_content, iterate_content) # print("rate:", rate) # 判断如果相似率大于阈值,则存入个人数据网络中 if rate >= self.ratio: # print("rate:", rate) # obj新插入的对象; iter_obj是老对象 # print("新对象对应的Class:", obj.__class__.__name__, # ", type:", type(obj.__class__.__name__)) # 获取联系 relation = cm.get_relation(iter_obj.__class__, obj.__class__) # 存入初始个人数据网络,字典数据结构 dict_relation = { "pre_id": iter_obj.get_id(), "relation": relation, "post_id": obj.get_id(), "pre_Class": iter_obj.__class__.__name__, "post_Class": obj.__class__.__name__, "pre_Activity": iter_obj.get_activity(), "post_Activity": obj.get_activity(), "pre_service": iter_obj.get_service(), "pre_title": iter_obj.get_title(), "pre_content": iter_obj.get_content(), "post_service": obj.get_service(), "post_title": obj.get_title(), "post_content": obj.get_content() } if relation != "Update": print("relation:", dict_relation) # 将每一个新联系加载到队列中 # print("新联系:", dict_relation) list_rela.append(dict_relation) return list_rela
def initial_data_status(self): counter = 0 # 计数器 # 查询所有记录 cursors = self.collection.find() for data in cursors: # 大于初始数据网络的一条数据处理 if counter >= self.k_no: break obj = self.create_class_obj(data) self.list_network.append(obj) counter += 1 # 计数器 print("counter:", counter) # 查找联系 if counter >= 2: for i in range(len(self.list_network)): iter_obj = self.list_network[i] # 迭代的对象 original_content = obj.get_content() if (original_content == "") or (original_content is None): original_content = str(obj.get_keywords()) if obj is iter_obj: continue iterate_content = iter_obj.get_content() + \ iter_obj.get_title() if (iterate_content == "") or (iterate_content is None): iterate_content = str(iter_obj.get_keywords()) print("original_content:", original_content) print("iter_cont:", iterate_content) # 计算两个不同文本的相似性 rate = Levenshtein.ratio(original_content, iterate_content) print("rate:", rate) # 判断如果相似率大于阈值,则存入个人数据网络中 if rate >= self.ratio: # obj新插入的对象; iter_obj是老对象 # print("新对象对应的Class:", obj.__class__.__name__, # ", type:", type(obj.__class__.__name__)) # 获取联系 relation = cm.get_relation(iter_obj.__class__, obj.__class__) # 存入初始个人数据网络,字典数据结构 dict_relation = { "pre_id": iter_obj.get_id(), "relation": relation, "post_id": obj.get_id(), "pre_Class": iter_obj.__class__.__name__, "post_Class": obj.__class__.__name__, "pre_Activity": iter_obj.get_activity(), "post_Activity": obj.get_activity() } self.list_network_relation.append(dict_relation) # self.list_network.extend(self.list_network_relation) if self.list_network: for i in range(len(self.list_network)): print("i:", i, ", list_network:", self.list_network[i]) else: print("list_network is empty!") if self.list_network_relation: for i in range(len(self.list_network_relation)): print("i:", i, ", list_network:", self.list_network_relation[i]) else: print("list_network_relation is empty!") print("counter:", counter)
def initial_data_status(self): counter = 0 # 计数器 # 查询所有记录 cursors = self.collection.find().skip(self.kip) for data in cursors: # 大于初始数据网络的一条数据处理 if counter >= self.k_no: break # 设置Linkedin用户状态数据的初始状态 if counter == self.k_no - 1: # print("最后一条记录!!") self.flag = self.initialize_linked_status(data) print("service:", data.get("服务ID")) obj = self.create_class_obj(data) # 创建对象 self.list_network.append(obj) counter += 1 # 计数器 print("counter:", counter) # 查找联系 if counter >= 2: for i in range(len(self.list_network)): iter_obj = self.list_network[i] # 迭代的对象 original_content = obj.get_content() if (original_content == "") or (original_content is None): original_content = str(obj.get_keywords()) if obj is iter_obj: continue iterate_content = iter_obj.get_content() + \ iter_obj.get_title() if (iterate_content == "") or (iterate_content is None): iterate_content = str(iter_obj.get_keywords()) # print("original_content:", original_content) # print("iter_cont:", iterate_content) # 计算两个不同文本的相似性 rate = Levenshtein.ratio(original_content, iterate_content) # print("rate:", rate) # 判断如果相似率大于阈值,则存入个人数据网络中 if rate >= self.ratio: # obj新插入的对象; iter_obj是老对象 # print("新对象对应的Class:", obj.__class__.__name__, # ", type:", type(obj.__class__.__name__)) # 获取联系 relation = cm.get_relation(iter_obj.__class__, obj.__class__) # 存入初始个人数据网络,字典数据结构 dict_relation = { "pre_id": iter_obj.get_id(), "relation": relation, "post_id": obj.get_id(), "pre_Class": iter_obj.__class__.__name__, "post_Class": obj.__class__.__name__, "pre_Activity": iter_obj.get_activity(), "post_Activity": obj.get_activity(), "pre_service": iter_obj.get_service(), "pre_content": iter_obj.get_content(), "pre_title": iter_obj.get_title(), "post_service": obj.get_service(), "post_content": obj.get_content(), "post_title": obj.get_title() } self.list_network_relation.append(dict_relation) # self.list_network.extend(self.list_network_relation) # 1.节点 print("节点:") if self.list_network: for i in range(len(self.list_network)): print("i:", i, ", list_network:", self.list_network[i]) else: print("list_network is empty!") # 2.联系 print("联系") if self.list_network_relation: for i in range(len(self.list_network_relation)): print("i:", i, ", list_network:", self.list_network_relation[i]) else: print("list_network_relation is empty!") # 3.输出用户职位数据 linkedin if self.position: for i in range(len(self.position)): print("i:", ", position:", self.position[i]) else: print("list_position is Empty!") print("counter:", counter)