def process_query(query_type, graph_type, input_path, output_path, seeds=[], c=0.15, epsilon=1e-9, max_iters=100, handles_deadend=True): ''' Processed a query to obtain a score vector w.r.t. the seeds inputs query_type : str type of querying {'rwr', 'ppr', 'pagerank'} graph_type: : str type of graph {'directed', 'undirected', 'bipartite'} input_path : str path for the graph data output_path : str path for storing an RWR score vector seeds : str seeds for query - 'rwr' : just a nonnegative integer - 'ppr' : list of nonnegative integers or file path - 'pagerank' : None, seeds = range(0, n) c : float restart probability epsilon : float error tolerance for power iteration max_iters : int maximum number of iterations for power iteration handles_deadend : bool if true, it will handle the deadend issue in power iteration otherwise, it won't, i.e., no guarantee for sum of RWR scores to be 1 in directed graphs outputs r : ndarray RWR score vector ''' if query_type == 'rwr': if type(seeds) is not int: raise TypeError('Seeds should be a single integer for RWR') rwr = RWR() rwr.read_graph(input_path, graph_type) r = rwr.compute(int(seeds), c, epsilon, max_iters) elif query_type == 'ppr': seeds = get_seeds(seeds) ppr = PPR() ppr.read_graph(input_path, graph_type) r = ppr.compute(seeds, c, epsilon, max_iters) elif query_type == 'pagerank': pagerank = PageRank() pagerank.read_graph(input_path, graph_type) r = pagerank.compute(c, epsilon, max_iters) write_vector(output_path, r)
def rwr2(seeds): dic = {2010784: 'Ivor Grattan-Guinness', 286788: 'Galilean invariance', 59958: 'Power series', 6513985: 'Mass in general relativity', 266950: "Galileo's ship", 275603: 'Frank Wilczek', 28305: 'String theory', 991: 'Absolute value', 295183: 'Galilean transformation', 33445862: 'Center of momentum frame', 61335: 'Differentiation', 644550: 'Higgs mechanism', 1410595: 'Field equation', 308: 'Aristotle', 18947: 'Metre', 14627: 'Isaac Newton', 26884: 'Superconductivity', 38318895: 'Bra-ket notation', 8429: 'Density', 857235: 'Equivalence principle', 244611: "Newton's law of universal gravitation", 25098: 'Phase velocity', 711862: 'Reissner–Nordström metric', 339024: 'Length contraction', 17553: "Kepler's laws of planetary motion", 288123: 'Worcester Polytechnic Institute', 531104: 'Gravity Probe B', 82728: 'Quantum superposition', 494418: 'Proper time', 12024: 'General relativity', 56683807: 'Shapiro delay', 10902: 'Force', 169552: 'Fifth force', 5961: 'Cognitive psychology', 30876419: 'Quantum state', 2053742: 'Contact force', 25525: 'René Descartes', 34753458: "Gauss' law for gravity", 610202: 'Fine structure', 60828: 'Lepton', 256662: 'Terminal velocity', 11051917: 'Günter Nimtz', 172466: 'Jean Buridan', 51079: 'Magnet', 9604: 'Many-worlds interpretation', 3406142: 'Relativity of simultaneity', 327127: 'John Archibald Wheeler', 76954: 'Parallel', 30400: 'Torque', 26764: 'International System of Units', 19555586: 'Classical mechanics', 74263: 'Frame of reference', 27808: 'Samuel Pepys', 9649: 'Energy', 149861: 'Work (physics)', 23536538: 'University of Wisconsin–Madison', 28736: 'Speed of light', 54386990: 'Absolute time and space', 5826: 'Complex number', 9723174: 'Irwin I. Shapiro', 7954422: 'Cornell University', 48781: 'Philosophiæ Naturalis Principia Mathematica', 495065: 'Louis Essen', 27709: 'Semiconductor', 25219: 'Quanta', 25202: 'Quantum mechanics', 22393: 'Organelle', 37892: 'Thrust', 5155121: 'Richard S. Westfall', 17902: 'Leonhard Euler', 2274: 'Arthur Eddington', 29688374: 'Galileo Galilei', 36001020: 'Gravitational wave detector', 1686861: 'Aether drag hypothesis', 12286: 'Great Plague of London', 5176: 'Calculus', 189951: 'Force carrier', 24577221: 'De Historia piscium', 191734: 'Godfrey Kneller', 55212: "Newton's laws of motion", 19593829: 'Spin (physics)', 174593: 'Socinianism', 1979961: "Torricelli's equation", 276582: 'Ricci curvature', 176931: 'Internet Archive', 6956: 'Conservation law', 174396: 'Bohr radius', 10779: 'Frequency', 561845: 'Jacob Bekenstein', 51624: 'Thomas Young (scientist)', 1135324: 'Conjugate variables', 606874: 'Einstein–Cartan theory', 719601: 'MIT Press', 187408: 'Self-adjoint operator', 154739: 'Electrodeposition', 312881: 'Action (physics)', 33931: 'Weight', 458558: 'List of scientific laws named after people', 426219: 'Classical electromagnetism', 22939: 'Physics', 26998617: 'Field (physics)', 25453985: 'Atomic clock', 2434383: 'Local reference frame', 4795569: 'Hypotheses non fingo', 28748: 'Speed', 37750448: 'Taub-NUT vacuum', 643769: 'Quantum tunnelling', 34083818: 'Entailment', 145343: 'Wave function', 9505941: 'List of quantum-mechanical systems with analytical solutions', 19694: 'Mercury (planet)', 21210: 'Niels Bohr', 239290: 'John Wallis', 10890: 'Fundamental interaction', 72540: 'Newton (unit)', 3691915: '0 (number)', 32533: 'Euclidean vector', 45489756: 'Gravitation', 1251967: 'Princeton University Press', 49210145: 'Coriolis effect', 47641: 'Standard Model', 579929: 'Rest (physics)', 15314901: 'Proper velocity', 33692268: 'List of topics named after Leonhard Euler', 26998547: 'Degrees of freedom (physics and chemistry)', 22222481: "Euler's laws of motion", 2217599: 'Circular symmetry', 1912367: 'Electromagnetic tensor', 204680: 'Four-momentum', 74327: 'Principle of relativity', 1072289: 'Uniform circular motion', 1949268: 'Aether (classical element)', 230488: 'Minkowski space', 30731: 'Teleological argument', 852089: 'Gravitational time dilation', 297839: 'Time dilation', 2288549: 'Momentum operator', 3071186: 'Gravitational acceleration', 26962: 'Special relativity', 22308: 'Oxford', 150159: "Noether's theorem", 19265670: 'Centrifugal force', 2218355: 'Astronomia nova', 2434557: 'Non-inertial reference frame', 14838: 'Inertial frame of reference', 475008: 'Stiffness', 1968588: 'Hypervelocity', 49720: 'Robert Hooke', 182727: "Mach's principle", 2916607: 'Force field (physics)', 265006: 'Dialogue Concerning the Two Chief World Systems', 473514: 'Generalized coordinates', 31429: 'Twin paradox', 9228: 'Earth', 226841: 'Four-acceleration', 2443: 'Acceleration', 12320384: 'Theory of impetus', 474962: 'Translation (physics)', 67088: 'Conservation of energy', 1111581: 'Reaction (physics)', 10886039: 'SunPower', 47922: 'Determinism', 25267: 'Quantum field theory', 20431: 'Momentum', 2413037: 'Isolated system', 240123: 'Plasticity (physics)', 308815: 'Connection (mathematics)', 19555: 'Molecule', 19048: 'Mass', 18404: 'Lorentz transformation', 2839: 'Angular momentum', 11132342: 'Bodmer Library', 33306: 'Water', 455769: 'Rigid body', 5390: 'Conversion of units', 157550: 'Karl Schwarzschild', 25312: 'Quantum gravity', 20491903: 'Velocity', 19737: "Maxwell's equations", 15221133: 'Euler force', 593693: 'Point (geometry)', 211922: 'Impulse (physics)', 18993869: 'Gas', 1209823: 'Rotating reference frame', 17939: 'Light', 692093: 'Oxford University Museum of Natural History', 30214333: '1st Earl of Halifax', 19194778: 'Deformation (mechanics)', 41026: 'Dielectric', 415513: 'Net force', 18589032: 'Particle accelerator', 36269934: 'Geodesic (general relativity)', 28486339: 'Georgia Institute of Technology', 13989702: 'Wolfram Demonstrations Project', 102338: 'Henry Cavendish', 1911511: 'Lorentz scalar', 1025272: 'Bohr–Einstein debates', 946273: 'Harry Ransom Center', 9532: 'Electromagnetism', 5918: 'Continuum mechanics', 20580: 'Motion (physics)', 14909: 'Inertia', 17384910: 'Observer (special relativity)', 70671: 'Stress–energy tensor', 17730: 'Latin', 48991805: 'Contributors to general relativity', 323592: 'Nicolaus Copernicus', 226829: 'Four-velocity', 33130: 'Werner Heisenberg', 38293253: 'Geodesic deviation equation', 19559: 'Mechanics', 4946686: 'Relative velocity', 27979: 'Sunlight', 11529: 'Fermion', 32498: 'Volume', 40170957: 'Action at a distance (physics)', 1201321: 'Superposition principle', 1126641: 'Invariant (physics)', 55442288: 'Constant factor rule in differentiation', 17327: 'Kinetic energy', 23912155: 'Gauge theory', 4838571: 'Position operator', 45151465: 'Natural phenomenon', 207833: 'Radial velocity', 291928: 'Operator (physics)', 198319: 'Hamiltonian mechanics', 173961: 'Center of mass', 20903754: 'Robotics'} word_id = [174593, 18947, 37892, 323592, 25098, 30731, 9228, 226829, 70671, 187408, 67088, 17939, 74263, 47641, 226841, 10779, 33306, 1111581, 1410595, 1912367, 312881, 59958, 29688374, 5176, 1979961, 49720, 27709, 14909, 48991805, 28736, 41026, 286788, 28748, 308815, 339024, 169552, 1949268, 74327, 230488, 455769, 291928, 45489756, 946273, 19555, 20580, 19593829, 276582, 19559, 19048, 33692268, 2053742, 25202, 154739, 852089, 1251967, 2217599, 12320384, 20491903, 25219, 19265670, 10890, 33931, 26764, 48781, 150159, 28305, 17553, 275603, 857235, 9505941, 10902, 256662, 22222481, 76954, 606874, 2010784, 531104, 27808, 1072289, 1201321, 4838571, 198319, 37750448, 4795569, 25267, 561845, 711862, 643769, 239290, 30214333, 30400, 5826, 28486339, 24577221, 266950, 31429, 18993869, 3071186, 1911511, 21210, 23912155, 1135324, 25312, 2274, 45151465, 426219, 8429, 19694, 719601, 32498, 1126641, 191734, 12024, 1025272, 36001020, 4946686, 2916607, 19555586, 30876419, 26884, 38293253, 11529, 5390, 295183, 26998547, 32533, 2839, 19737, 415513, 593693, 5918, 56683807, 49210145, 14627, 176931, 22308, 9723174, 82728, 6956, 54386990, 38318895, 265006, 5155121, 47922, 308, 174396, 9532, 3406142, 458558, 6513985, 17730, 13989702, 5961, 20903754, 27979, 1686861, 2434383, 494418, 26962, 474962, 26998617, 579929, 72540, 149861, 18589032, 33130, 157550, 297839, 36269934, 55442288, 2218355, 22393, 288123, 692093, 475008, 244611, 9604, 51079, 204680, 173961, 3691915, 2443, 11051917, 15221133, 61335, 10886039, 610202, 23536538, 60828, 22939, 19194778, 25453985, 2288549, 51624, 473514, 55212, 17327, 9649, 34753458, 172466, 25525, 11132342, 145343, 102338, 644550, 182727, 1968588, 40170957, 17384910, 20431, 211922, 15314901, 327127, 495065, 207833, 991, 1209823, 18404, 33445862, 34083818, 2413037, 17902, 7954422, 14838, 240123, 2434557, 12286, 189951] A = dict() # 1 1 1 rwr = RWR() rwr.read_graph('path2.txt', "undirected") r = rwr.compute(seed=seeds, max_iters=100) index = 0 for i in r: index = index + 1 if i > 1e-12: if (index + 0) in word_id: A[dic[index + 0]] = i[0] return A
def run_rwr(edge_list, start_nodes, target_nodes, node_mapping): # node_mapping = create_numbered_edgelist(edge_list) num_to_node_mapping = {node_mapping[x]: x for x in node_mapping.keys()} start_nums = [node_mapping[x] for x in start_nodes if x in node_mapping] target_nums = [node_mapping[x] for x in target_nodes if x in node_mapping] # np array to track the sum of residuals from iteration of the loop residuals = np.zeros(len(node_mapping)) rwr = RWR() rwr.read_graph(edge_list, 'undirected') for node_num in start_nums: r = rwr.compute(node_num, c=.15, max_iters=100) residuals += r r_df = pd.DataFrame({'node': list(range(len(residuals))), 'residuals': residuals}) r_df = r_df.sort_values('residuals', ascending=False) # remove non-genes from rankings r_df['node_name'] = [num_to_node_mapping[x] for x in r_df['node']] return r_df
class Features(): #rwr是类变量,所有类的实例共享该变量 start = time.time() rwr = RWR() rwr.read_graph("../dataset/tab_follower_gcc.edgelist", "undirected") print("生成rwr模型花费的时间:", time.time() - start) print("读取网络结构嵌入特征......") start = time.time() topology_embeddings_file = "../embeddings/follower_gcc.anony.embeddings" embeddings_dict = read_embeddings_dict(topology_embeddings_file) #读入cas_timline_embeddings的数据,计划添加进特征数据 print("读入cas_timline_embeddings的数据:") start = time.time() cas_timeline_embeddings = read_embeddings_dict( "../embeddings/cas_timeline.embeddings") print("读入cas_timline_embeddings的数据所花的时间:", time.time() - start) nodeToHashtag, G = get_G_nodeToHashtag() def __init__(self, unique_Ak, hashtag): self.N = 595460 self.unique_Ak = unique_Ak self.hashtag = hashtag def the_early_adopter_shortest(self, target_node): """取早期感染节点中和目标节点距离最短的节点和路径长度""" # shortest_path_length = 1000000 path_length = [ nx.shortest_path_length(self.G, source=node, target=target_node) for node in self.unique_Ak ] min_path_length = min(path_length) shortest_early_adopter = self.unique_Ak[path_length.index( min_path_length)] # for node in firstKV: # path_length = nx.shortest_path_length(G, source = node, target = target_node) # if path_length < shortest_path_length: # shortest_path_length = path_length # shortest_early_adopter = node return shortest_early_adopter, min_path_length def get_adoption_p(self, target_node): #belta is a damping factor, empirically set as 0.05 belta = 0.05 shortest_early_adopter, shortest_length = self.the_early_adopter_shortest( target_node) #print("shortest_early_adopter:", shortest_early_adopter) try: target_node_hashtags = self.nodeToHashtag[target_node] except: target_node_hashtags = set() return 0 if self.hashtag in target_node_hashtags: return 1 try: shortest_early_adopter_hashtags = self.nodeToHashtag[ shortest_early_adopter] #判断shortest_early_adopter是否参与过话题,找出参与的话题集;若没参与话题,则设置其话题集为空 except: shortest_early_adopter_hashtags = set() return 0 # print("new_hashtag:", self.new_hashtag) # print("shortest_early_adopter_hashtags:", shortest_early_adopter_hashtags) # print("target_node_hashtags:", target_node_hashtags) adoption_p = (belta**shortest_length) * len( shortest_early_adopter_hashtags & target_node_hashtags) / len(shortest_early_adopter_hashtags | target_node_hashtags) return adoption_p def First_Forwarder_Features(self): ''' outdeg_v1: the degree of first forwarder in G num_hashtags_v1: number of past hashtags used by v1 orig_connections_k:number of early 2 to k forwarders who are friends of the first forwarder ''' # unique_Ak = self.unique_AK First_Forwarder_id = self.unique_Ak[0] #取第一个被感染的节点的编号 outdeg_v1 = self.G.degree(First_Forwarder_id) #第一个被感染节点的度 num_hashtags_v1 = len( self.nodeToHashtag[First_Forwarder_id]) #第一个被感染结点参与的话题数 orig_connections_k = len( set(self.G.neighbors(First_Forwarder_id)) & set(self.unique_Ak)) #早期K个感染节点中是第一感染节点的邻居节点数 # First_Forwarder_features_name = ["outdeg_v1", "num_hashtags_v1", "orig_connections_k"] first_forwarder_features = [ outdeg_v1, num_hashtags_v1, orig_connections_k ] return first_forwarder_features def First_K_Forwarders_Features(self): ''' 计算前K个被感染节点的相关特征 ''' #前K个节点中有不同用户的个数(同一用户可能会多次参与) unique_Ak_num = len(self.unique_Ak) #Ak_deg_G,DegreeView类型 早期感染节点集AK在大网络G中的度 Ak_deg_G = [self.G.degree(node) for node in self.unique_Ak] #前K个早期感染节点的总的一阶邻居数 views_1k = sum(Ak_deg_G) #构建前K感染节点的子图 subG_Ak = self.G.subgraph(self.unique_Ak) Ak_deg_subG = [subG_Ak.degree(node) for node in self.unique_Ak] max_, min_, ave = [ max(Ak_deg_subG), min(Ak_deg_subG), sum(Ak_deg_subG) / len(Ak_deg_subG) ] #子图中节点的最大度、最小度、平均度 subG_edges = subG_Ak.size() #子图中的边数 return [unique_Ak_num, max_, min_, ave, views_1k, subG_edges] def Structure_Features(self): Ak_deg_G = [] for node in self.unique_Ak: if node < self.N: Ak_deg_G.append(self.G.degree(node)) else: Ak_deg_G.append(0) # G_Ak_deg = G.degree(unique_Ak) # Ak_deg_G = [G_Ak_deg[i] for i in range(len(unique_Ak))] max_AkG_deg, min_AkG_deg, ave_AkG_deg = [ max(Ak_deg_G), min(Ak_deg_G), sum(Ak_deg_G) / len(Ak_deg_G) ] return [max_AkG_deg, min_AkG_deg, ave_AkG_deg] def get_Temporal_Features(self, firstKV_with_time, K): ''' 获取前k个感染节点的时间相关特征 ''' firstKV = firstKV_with_time #前K个节点中有不同用户的个数(同一用户可能会多次参与) unique_Ak_num = len(self.unique_Ak) #Ak_deg_G,DegreeView类型 早期感染节点集AK在大网络G中的度 Ak_deg_G = [self.G.degree(node) for node in self.unique_Ak] #前K个早期感染节点的总的一阶邻居数 views_1k = sum(Ak_deg_G) #保存前K个感染节点中,第i个到第1个参与话题的时间差 time_1_i = [(firstKV[2 * i] - firstKV[0]) for i in range(1, K)] # ============================================================================= # i = 1 # time_1_i = [] # while i < K: # temp_i = firstKV[ 2 * i ] - firstKV[0] # time_1_i.append(temp_i) # i += 1 # ============================================================================= #计算前k个感染节点中第i个和第i+1个感染时间差 time_i_iplus1 = [(firstKV[2 * (i + 1)] - firstKV[2 * i]) for i in range(1, K - 1)] # ============================================================================= # i = 1 # time_i_iplus1 = [] # for i in range(K-1): # interval_i = firstKV[2*(i+1)] - firstKV[2*i] # time_i_iplus1.append(interval_i) # ============================================================================= #前k个节点相邻两个节点间感染时间差平均值 time_ave_k = sum(time_i_iplus1) / len(time_i_iplus1) #前k/2个节点间感染时间差平均值 time_ave_1_k2 = sum(time_i_iplus1[:K // 2]) / len( time_i_iplus1[:K // 2]) #后k/2个节点间感染时间差平均值 time_ave_k2_k = sum(time_i_iplus1[K // 2 + 1:]) / len( time_i_iplus1[K // 2 + 1:]) #第1个和第k个感染时间差 time_k = time_1_i[-1] #信息被看见的速度 speed_exposure = views_1k / time_k #信息扩散的速度 speed_adoption = unique_Ak_num / time_k temporal_features = [ time_ave_k, time_ave_1_k2, time_ave_k2_k, speed_exposure, speed_adoption ] + time_1_i # temporal_features.extend(time_1_i) return temporal_features def calEuclideanDistance(self, vec1, vec2): '''计算两向量间的欧式距离''' dist = np.sqrt(np.sum(np.square(vec1 - vec2))) # round(dist, 6) return dist def cosine_similarity(self, vector1, vector2): dot_product = 0.0 normA = 0.0 normB = 0.0 for a, b in zip(vector1, vector2): dot_product += a * b normA += a**2 normB += b**2 if normA == 0.0 or normB == 0.0: return 0 else: return round(dot_product / ((normA**0.5) * (normB**0.5)) * 100, 2) def get_reachability_features(self, target_node): ''' adoption_node: int类型,早期感染中的一个节点 target_node: int类型,待预测的目标节点 distKV_target: list类型, 第一个元素记录的是目标节点编号,其他元素则是目标节点到早期节点的距离(从拓扑结构角度) cas_consine_similarity_firstKV_target: list类型, 计算级联embedding中, 目标节点和早期感染节点间的相似性 ''' # start = time.time() # distKV_target.append(target_node) target_topology_embedding = np.array(self.embeddings_dict[target_node]) distKV_target = [self.calEuclideanDistance(np.array(self.embeddings_dict[adoption_node]), \ target_topology_embedding) for adoption_node in self.unique_Ak] if self.cas_timeline_embeddings.__contains__(target_node): cas_consine_similarity_firstKV_target = [] target_timeline_embedding = self.cas_timeline_embeddings[ target_node] for adoption_node in self.unique_Ak: if self.cas_timeline_embeddings.__contains__(adoption_node): cas_consine_similarity_firstKV_target.append( self.cosine_similarity( self.cas_timeline_embeddings[adoption_node], target_timeline_embedding)) else: cas_consine_similarity_firstKV_target.append(0) # cas_consine_similarity_firstKV_target = [(cosine_similarity(np.array(self.cas_timeline_embeddings[adoption_node]), \ # target_timeline_embedding) for adoption_node in self.unique_Ak) if self.cas_timeline_embeddings.__contains__(adoption_node) else 0] else: cas_consine_similarity_firstKV_target = [ random.uniform(0, 0.1) for i in range(len(self.unique_Ak)) ] # ============================================================================= # for adoption_node in self.unique_Ak: # adoption_topology_embedding = np.array(self.embeddings_dict[adoption_node]) # # dist = self.calEuclideanDistance(adoption_topology_embedding, target_topology_embedding) # distKV_target.append(dist) # ============================================================================= # end = time.time() # print ('计算单个目标节点到早期感染节点的运行时间',end-start) return distKV_target, cas_consine_similarity_firstKV_target def Target_Features(self, target_node): ''' 计算目标节点的特征 ''' # target_features_names = ["target_neighbors_num","First_nbr_adopter", \ # "Second_nbr_adopter", "Third_nbr_adopter", \ # "past_adoption_exist", "past_adoption_target_num",\ # "com_hashtags_max", "com_hashtags_min", \ # "com_hashtags_ave",] #计算early adopters到目标节点的probability of random walk with restarting adoption_p = self.get_adoption_p(target_node) r = self.rwr.compute(target_node) r_K = [r[node][0] for node in self.unique_Ak] distKV_target, cas_consine_similarity_firstKV_target = self.get_reachability_features( target_node) target_1_neighbors = list(self.G.neighbors(target_node)) #目标节点的邻居节点数 target_neighbors_num = len(target_1_neighbors) #目标节点邻居和早期感染节点的交集的个数 First_nbr_adopter = len(set(target_1_neighbors) & set(self.unique_Ak)) # target_2_neighbors = [ for node in target_1_neighbors] target_2_neighbors = [] for node in target_1_neighbors: target_2_neighbors.extend(list(self.G.neighbors(node))) #目标节点二阶邻居和早期感染节点的交集的个数 Second_nbr_adopter = len(set(target_2_neighbors) & set(self.unique_Ak)) #判断目标用户之前是否参与过话题讨论,参与过 past_adoption_exist赋值为1, 未参与则为0 target_3_neighbors = [] for node in target_2_neighbors: target_3_neighbors.extend(list(self.G.neighbors(node))) #目标节点二阶邻居和早期感染节点的交集的个数 Third_nbr_adopter = len(set(target_3_neighbors) & set(self.unique_Ak)) try: past_adoption_target = self.nodeToHashtag[target_node] #目标节点参与的话题数,目标节点的活跃度 past_adoption_target_num = len(past_adoption_target) #目标节点参与话题的标签设为1 past_adoption_exist = 1 common_hashtags = [ len(set(self.nodeToHashtag[node]) & set(past_adoption_target)) for node in self.unique_Ak ] com_hashtags_max, com_hashtags_min, com_hashtags_ave = \ [max(common_hashtags), min(common_hashtags), sum(common_hashtags)/len(common_hashtags)] except: #目标节点从未参与话题讨论 past_adoption_exist = 0 #目标节点从未参与话题讨论,参与的话题数为0 past_adoption_target_num = 0 com_hashtags_max, com_hashtags_min, com_hashtags_ave = [0, 0, 0] # ============================================================================= # #过去节点参与过话题讨论,则计算各个早期感染节点和目标节点间话题讨论的重合个数 # if past_adoption_exist: # common_hashtags = [len(set(self.nodeToHashtag[node]) & set(past_adoption_target)) for node in self.unique_Ak] # com_hashtags_max, com_hashtags_min, com_hashtags_ave = \ # [max(common_hashtags), min(common_hashtags), sum(common_hashtags)/len(common_hashtags)] # # else: # com_hashtags_max, com_hashtags_min, com_hashtags_ave = [0,0,0] # ============================================================================= target_features = [target_node, target_neighbors_num, First_nbr_adopter, Second_nbr_adopter, \ Third_nbr_adopter, past_adoption_exist, past_adoption_target_num, \ com_hashtags_max, com_hashtags_min, com_hashtags_ave] target_features += r_K target_features += distKV_target target_features += cas_consine_similarity_firstKV_target + [adoption_p] return target_features