def process_query(query_type, graph_type, input_path, output_path, seeds=[], c=0.15,
        epsilon=1e-9, max_iters=100, handles_deadend=True):
    '''
    Processed a query to obtain a score vector w.r.t. the seeds

    inputs
        query_type : str
            type of querying {'rwr', 'ppr', 'pagerank'}
        graph_type: : str
            type of graph {'directed', 'undirected', 'bipartite'}
        input_path : str
            path for the graph data
        output_path : str
            path for storing an RWR score vector
        seeds : str
            seeds for query
                - 'rwr' : just a nonnegative integer
                - 'ppr' : list of nonnegative integers or file path
                - 'pagerank' : None, seeds = range(0, n)
        c : float
            restart probability
        epsilon : float
            error tolerance for power iteration
        max_iters : int
            maximum number of iterations for power iteration handles_deadend : bool
            if true, it will handle the deadend issue in power iteration
            otherwise, it won't, i.e., no guarantee for sum of RWR scores
            to be 1 in directed graphs
    outputs
        r : ndarray
            RWR score vector
    '''

    if query_type == 'rwr':
        if type(seeds) is not int:
            raise TypeError('Seeds should be a single integer for RWR')
        rwr = RWR()
        rwr.read_graph(input_path, graph_type)
        r = rwr.compute(int(seeds), c, epsilon, max_iters)
    elif query_type == 'ppr':
        seeds = get_seeds(seeds)
        ppr = PPR()
        ppr.read_graph(input_path, graph_type)
        r = ppr.compute(seeds, c, epsilon, max_iters)
    elif query_type == 'pagerank':
        pagerank = PageRank()
        pagerank.read_graph(input_path, graph_type)
        r = pagerank.compute(c, epsilon, max_iters)

    write_vector(output_path, r)
Esempio n. 2
0
def rwr2(seeds):
    dic = {2010784: 'Ivor Grattan-Guinness', 286788: 'Galilean invariance', 59958: 'Power series', 6513985: 'Mass in general relativity', 266950: "Galileo's ship", 275603: 'Frank Wilczek', 28305: 'String theory', 991: 'Absolute value', 295183: 'Galilean transformation', 33445862: 'Center of momentum frame', 61335: 'Differentiation', 644550: 'Higgs mechanism', 1410595: 'Field equation', 308: 'Aristotle', 18947: 'Metre', 14627: 'Isaac Newton', 26884: 'Superconductivity', 38318895: 'Bra-ket notation', 8429: 'Density', 857235: 'Equivalence principle', 244611: "Newton's law of universal gravitation", 25098: 'Phase velocity', 711862: 'Reissner–Nordström metric', 339024: 'Length contraction', 17553: "Kepler's laws of planetary motion", 288123: 'Worcester Polytechnic Institute', 531104: 'Gravity Probe B', 82728: 'Quantum superposition', 494418: 'Proper time', 12024: 'General relativity', 56683807: 'Shapiro delay', 10902: 'Force', 169552: 'Fifth force', 5961: 'Cognitive psychology', 30876419: 'Quantum state', 2053742: 'Contact force', 25525: 'René Descartes', 34753458: "Gauss' law for gravity", 610202: 'Fine structure', 60828: 'Lepton', 256662: 'Terminal velocity', 11051917: 'Günter Nimtz', 172466: 'Jean Buridan', 51079: 'Magnet', 9604: 'Many-worlds interpretation', 3406142: 'Relativity of simultaneity', 327127: 'John Archibald Wheeler', 76954: 'Parallel', 30400: 'Torque', 26764: 'International System of Units', 19555586: 'Classical mechanics', 74263: 'Frame of reference', 27808: 'Samuel Pepys', 9649: 'Energy', 149861: 'Work (physics)', 23536538: 'University of Wisconsin–Madison', 28736: 'Speed of light', 54386990: 'Absolute time and space', 5826: 'Complex number', 9723174: 'Irwin I. Shapiro', 7954422: 'Cornell University', 48781: 'Philosophiæ Naturalis Principia Mathematica', 495065: 'Louis Essen', 27709: 'Semiconductor', 25219: 'Quanta', 25202: 'Quantum mechanics', 22393: 'Organelle', 37892: 'Thrust', 5155121: 'Richard S. Westfall', 17902: 'Leonhard Euler', 2274: 'Arthur Eddington', 29688374: 'Galileo Galilei', 36001020: 'Gravitational wave detector', 1686861: 'Aether drag hypothesis', 12286: 'Great Plague of London', 5176: 'Calculus', 189951: 'Force carrier', 24577221: 'De Historia piscium', 191734: 'Godfrey Kneller', 55212: "Newton's laws of motion", 19593829: 'Spin (physics)', 174593: 'Socinianism', 1979961: "Torricelli's equation", 276582: 'Ricci curvature', 176931: 'Internet Archive', 6956: 'Conservation law', 174396: 'Bohr radius', 10779: 'Frequency', 561845: 'Jacob Bekenstein', 51624: 'Thomas Young (scientist)', 1135324: 'Conjugate variables', 606874: 'Einstein–Cartan theory', 719601: 'MIT Press', 187408: 'Self-adjoint operator', 154739: 'Electrodeposition', 312881: 'Action (physics)', 33931: 'Weight', 458558: 'List of scientific laws named after people', 426219: 'Classical electromagnetism', 22939: 'Physics', 26998617: 'Field (physics)', 25453985: 'Atomic clock', 2434383: 'Local reference frame', 4795569: 'Hypotheses non fingo', 28748: 'Speed', 37750448: 'Taub-NUT vacuum', 643769: 'Quantum tunnelling', 34083818: 'Entailment', 145343: 'Wave function', 9505941: 'List of quantum-mechanical systems with analytical solutions', 19694: 'Mercury (planet)', 21210: 'Niels Bohr', 239290: 'John Wallis', 10890: 'Fundamental interaction', 72540: 'Newton (unit)', 3691915: '0 (number)', 32533: 'Euclidean vector', 45489756: 'Gravitation', 1251967: 'Princeton University Press', 49210145: 'Coriolis effect', 47641: 'Standard Model', 579929: 'Rest (physics)', 15314901: 'Proper velocity', 33692268: 'List of topics named after Leonhard Euler', 26998547: 'Degrees of freedom (physics and chemistry)', 22222481: "Euler's laws of motion", 2217599: 'Circular symmetry', 1912367: 'Electromagnetic tensor', 204680: 'Four-momentum', 74327: 'Principle of relativity', 1072289: 'Uniform circular motion', 1949268: 'Aether (classical element)', 230488: 'Minkowski space', 30731: 'Teleological argument', 852089: 'Gravitational time dilation', 297839: 'Time dilation', 2288549: 'Momentum operator', 3071186: 'Gravitational acceleration', 26962: 'Special relativity', 22308: 'Oxford', 150159: "Noether's theorem", 19265670: 'Centrifugal force', 2218355: 'Astronomia nova', 2434557: 'Non-inertial reference frame', 14838: 'Inertial frame of reference', 475008: 'Stiffness', 1968588: 'Hypervelocity', 49720: 'Robert Hooke', 182727: "Mach's principle", 2916607: 'Force field (physics)', 265006: 'Dialogue Concerning the Two Chief World Systems', 473514: 'Generalized coordinates', 31429: 'Twin paradox', 9228: 'Earth', 226841: 'Four-acceleration', 2443: 'Acceleration', 12320384: 'Theory of impetus', 474962: 'Translation (physics)', 67088: 'Conservation of energy', 1111581: 'Reaction (physics)', 10886039: 'SunPower', 47922: 'Determinism', 25267: 'Quantum field theory', 20431: 'Momentum', 2413037: 'Isolated system', 240123: 'Plasticity (physics)', 308815: 'Connection (mathematics)', 19555: 'Molecule', 19048: 'Mass', 18404: 'Lorentz transformation', 2839: 'Angular momentum', 11132342: 'Bodmer Library', 33306: 'Water', 455769: 'Rigid body', 5390: 'Conversion of units', 157550: 'Karl Schwarzschild', 25312: 'Quantum gravity', 20491903: 'Velocity', 19737: "Maxwell's equations", 15221133: 'Euler force', 593693: 'Point (geometry)', 211922: 'Impulse (physics)', 18993869: 'Gas', 1209823: 'Rotating reference frame', 17939: 'Light', 692093: 'Oxford University Museum of Natural History', 30214333: '1st Earl of Halifax', 19194778: 'Deformation (mechanics)', 41026: 'Dielectric', 415513: 'Net force', 18589032: 'Particle accelerator', 36269934: 'Geodesic (general relativity)', 28486339: 'Georgia Institute of Technology', 13989702: 'Wolfram Demonstrations Project', 102338: 'Henry Cavendish', 1911511: 'Lorentz scalar', 1025272: 'Bohr–Einstein debates', 946273: 'Harry Ransom Center', 9532: 'Electromagnetism', 5918: 'Continuum mechanics', 20580: 'Motion (physics)', 14909: 'Inertia', 17384910: 'Observer (special relativity)', 70671: 'Stress–energy tensor', 17730: 'Latin', 48991805: 'Contributors to general relativity', 323592: 'Nicolaus Copernicus', 226829: 'Four-velocity', 33130: 'Werner Heisenberg', 38293253: 'Geodesic deviation equation', 19559: 'Mechanics', 4946686: 'Relative velocity', 27979: 'Sunlight', 11529: 'Fermion', 32498: 'Volume', 40170957: 'Action at a distance (physics)', 1201321: 'Superposition principle', 1126641: 'Invariant (physics)', 55442288: 'Constant factor rule in differentiation', 17327: 'Kinetic energy', 23912155: 'Gauge theory', 4838571: 'Position operator', 45151465: 'Natural phenomenon', 207833: 'Radial velocity', 291928: 'Operator (physics)', 198319: 'Hamiltonian mechanics', 173961: 'Center of mass', 20903754: 'Robotics'}
    word_id = [174593, 18947, 37892, 323592, 25098, 30731, 9228, 226829, 70671, 187408, 67088, 17939, 74263, 47641, 226841, 10779, 33306, 1111581, 1410595, 1912367, 312881, 59958, 29688374, 5176, 1979961, 49720, 27709, 14909, 48991805, 28736, 41026, 286788, 28748, 308815, 339024, 169552, 1949268, 74327, 230488, 455769, 291928, 45489756, 946273, 19555, 20580, 19593829, 276582, 19559, 19048, 33692268, 2053742, 25202, 154739, 852089, 1251967, 2217599, 12320384, 20491903, 25219, 19265670, 10890, 33931, 26764, 48781, 150159, 28305, 17553, 275603, 857235, 9505941, 10902, 256662, 22222481, 76954, 606874, 2010784, 531104, 27808, 1072289, 1201321, 4838571, 198319, 37750448, 4795569, 25267, 561845, 711862, 643769, 239290, 30214333, 30400, 5826, 28486339, 24577221, 266950, 31429, 18993869, 3071186, 1911511, 21210, 23912155, 1135324, 25312, 2274, 45151465, 426219, 8429, 19694, 719601, 32498, 1126641, 191734, 12024, 1025272, 36001020, 4946686, 2916607, 19555586, 30876419, 26884, 38293253, 11529, 5390, 295183, 26998547, 32533, 2839, 19737, 415513, 593693, 5918, 56683807, 49210145, 14627, 176931, 22308, 9723174, 82728, 6956, 54386990, 38318895, 265006, 5155121, 47922, 308, 174396, 9532, 3406142, 458558, 6513985, 17730, 13989702, 5961, 20903754, 27979, 1686861, 2434383, 494418, 26962, 474962, 26998617, 579929, 72540, 149861, 18589032, 33130, 157550, 297839, 36269934, 55442288, 2218355, 22393, 288123, 692093, 475008, 244611, 9604, 51079, 204680, 173961, 3691915, 2443, 11051917, 15221133, 61335, 10886039, 610202, 23536538, 60828, 22939, 19194778, 25453985, 2288549, 51624, 473514, 55212, 17327, 9649, 34753458, 172466, 25525, 11132342, 145343, 102338, 644550, 182727, 1968588, 40170957, 17384910, 20431, 211922, 15314901, 327127, 495065, 207833, 991, 1209823, 18404, 33445862, 34083818, 2413037, 17902, 7954422, 14838, 240123, 2434557, 12286, 189951]
    A = dict()
    # 1 1 1
    rwr = RWR()
    rwr.read_graph('path2.txt', "undirected")
    r = rwr.compute(seed=seeds, max_iters=100)
    index = 0
    for i in r:
        index = index + 1
        if i > 1e-12:
            if (index + 0) in word_id:
                A[dic[index + 0]] = i[0]
    return A
Esempio n. 3
0
def run_rwr(edge_list, start_nodes, target_nodes, node_mapping):
    # node_mapping = create_numbered_edgelist(edge_list)
    num_to_node_mapping = {node_mapping[x]: x for x in node_mapping.keys()}
    start_nums = [node_mapping[x] for x in start_nodes if x in node_mapping]
    target_nums = [node_mapping[x] for x in target_nodes if x in node_mapping]
    # np array to track the sum of residuals from iteration of the loop
    residuals = np.zeros(len(node_mapping))
    rwr = RWR()
    rwr.read_graph(edge_list, 'undirected')
    for node_num in start_nums:
        r = rwr.compute(node_num, c=.15, max_iters=100)
        residuals += r
    r_df = pd.DataFrame({'node': list(range(len(residuals))), 'residuals': residuals})
    r_df = r_df.sort_values('residuals', ascending=False)

    # remove non-genes from rankings
    r_df['node_name'] = [num_to_node_mapping[x] for x in r_df['node']]
    return r_df
Esempio n. 4
0
class Features():
    #rwr是类变量,所有类的实例共享该变量
    start = time.time()
    rwr = RWR()
    rwr.read_graph("../dataset/tab_follower_gcc.edgelist", "undirected")
    print("生成rwr模型花费的时间:", time.time() - start)
    print("读取网络结构嵌入特征......")
    start = time.time()
    topology_embeddings_file = "../embeddings/follower_gcc.anony.embeddings"
    embeddings_dict = read_embeddings_dict(topology_embeddings_file)
    #读入cas_timline_embeddings的数据,计划添加进特征数据
    print("读入cas_timline_embeddings的数据:")
    start = time.time()
    cas_timeline_embeddings = read_embeddings_dict(
        "../embeddings/cas_timeline.embeddings")
    print("读入cas_timline_embeddings的数据所花的时间:", time.time() - start)
    nodeToHashtag, G = get_G_nodeToHashtag()

    def __init__(self, unique_Ak, hashtag):
        self.N = 595460
        self.unique_Ak = unique_Ak
        self.hashtag = hashtag

    def the_early_adopter_shortest(self, target_node):
        """取早期感染节点中和目标节点距离最短的节点和路径长度"""
        #        shortest_path_length = 1000000
        path_length = [
            nx.shortest_path_length(self.G, source=node, target=target_node)
            for node in self.unique_Ak
        ]
        min_path_length = min(path_length)
        shortest_early_adopter = self.unique_Ak[path_length.index(
            min_path_length)]
        #        for node in firstKV:
        #            path_length = nx.shortest_path_length(G, source = node, target = target_node)
        #            if path_length < shortest_path_length:
        #                shortest_path_length = path_length
        #                shortest_early_adopter = node
        return shortest_early_adopter, min_path_length

    def get_adoption_p(self, target_node):
        #belta is a damping factor, empirically set as 0.05
        belta = 0.05
        shortest_early_adopter, shortest_length = self.the_early_adopter_shortest(
            target_node)
        #print("shortest_early_adopter:", shortest_early_adopter)
        try:
            target_node_hashtags = self.nodeToHashtag[target_node]
        except:
            target_node_hashtags = set()
            return 0
        if self.hashtag in target_node_hashtags:
            return 1

        try:
            shortest_early_adopter_hashtags = self.nodeToHashtag[
                shortest_early_adopter]
            #判断shortest_early_adopter是否参与过话题,找出参与的话题集;若没参与话题,则设置其话题集为空
        except:
            shortest_early_adopter_hashtags = set()
            return 0
#        print("new_hashtag:", self.new_hashtag)
#        print("shortest_early_adopter_hashtags:", shortest_early_adopter_hashtags)
#        print("target_node_hashtags:", target_node_hashtags)

        adoption_p = (belta**shortest_length) * len(
            shortest_early_adopter_hashtags
            & target_node_hashtags) / len(shortest_early_adopter_hashtags
                                          | target_node_hashtags)
        return adoption_p

    def First_Forwarder_Features(self):
        '''
        outdeg_v1: the degree of first forwarder in G
        num_hashtags_v1: number of past hashtags used by v1
        orig_connections_k:number of early 2 to k forwarders who are friends of the first forwarder
        '''
        #        unique_Ak = self.unique_AK
        First_Forwarder_id = self.unique_Ak[0]  #取第一个被感染的节点的编号
        outdeg_v1 = self.G.degree(First_Forwarder_id)  #第一个被感染节点的度
        num_hashtags_v1 = len(
            self.nodeToHashtag[First_Forwarder_id])  #第一个被感染结点参与的话题数
        orig_connections_k = len(
            set(self.G.neighbors(First_Forwarder_id))
            & set(self.unique_Ak))  #早期K个感染节点中是第一感染节点的邻居节点数
        #    First_Forwarder_features_name = ["outdeg_v1", "num_hashtags_v1", "orig_connections_k"]
        first_forwarder_features = [
            outdeg_v1, num_hashtags_v1, orig_connections_k
        ]
        return first_forwarder_features

    def First_K_Forwarders_Features(self):
        '''
        计算前K个被感染节点的相关特征
        '''
        #前K个节点中有不同用户的个数(同一用户可能会多次参与)
        unique_Ak_num = len(self.unique_Ak)
        #Ak_deg_G,DegreeView类型 早期感染节点集AK在大网络G中的度
        Ak_deg_G = [self.G.degree(node) for node in self.unique_Ak]

        #前K个早期感染节点的总的一阶邻居数
        views_1k = sum(Ak_deg_G)
        #构建前K感染节点的子图
        subG_Ak = self.G.subgraph(self.unique_Ak)
        Ak_deg_subG = [subG_Ak.degree(node) for node in self.unique_Ak]
        max_, min_, ave = [
            max(Ak_deg_subG),
            min(Ak_deg_subG),
            sum(Ak_deg_subG) / len(Ak_deg_subG)
        ]  #子图中节点的最大度、最小度、平均度
        subG_edges = subG_Ak.size()  #子图中的边数
        return [unique_Ak_num, max_, min_, ave, views_1k, subG_edges]

    def Structure_Features(self):

        Ak_deg_G = []
        for node in self.unique_Ak:
            if node < self.N:
                Ak_deg_G.append(self.G.degree(node))
            else:
                Ak_deg_G.append(0)

    #    G_Ak_deg = G.degree(unique_Ak)
    #    Ak_deg_G = [G_Ak_deg[i] for i in range(len(unique_Ak))]
        max_AkG_deg, min_AkG_deg, ave_AkG_deg = [
            max(Ak_deg_G),
            min(Ak_deg_G),
            sum(Ak_deg_G) / len(Ak_deg_G)
        ]
        return [max_AkG_deg, min_AkG_deg, ave_AkG_deg]

    def get_Temporal_Features(self, firstKV_with_time, K):
        '''
        获取前k个感染节点的时间相关特征
        '''

        firstKV = firstKV_with_time
        #前K个节点中有不同用户的个数(同一用户可能会多次参与)
        unique_Ak_num = len(self.unique_Ak)
        #Ak_deg_G,DegreeView类型 早期感染节点集AK在大网络G中的度

        Ak_deg_G = [self.G.degree(node) for node in self.unique_Ak]

        #前K个早期感染节点的总的一阶邻居数
        views_1k = sum(Ak_deg_G)

        #保存前K个感染节点中,第i个到第1个参与话题的时间差

        time_1_i = [(firstKV[2 * i] - firstKV[0]) for i in range(1, K)]
        # =============================================================================
        #         i = 1
        #         time_1_i = []
        #         while i < K:
        #             temp_i = firstKV[ 2 * i ] - firstKV[0]
        #             time_1_i.append(temp_i)
        #             i += 1
        # =============================================================================

        #计算前k个感染节点中第i个和第i+1个感染时间差
        time_i_iplus1 = [(firstKV[2 * (i + 1)] - firstKV[2 * i])
                         for i in range(1, K - 1)]
        # =============================================================================
        #         i = 1
        #         time_i_iplus1 = []
        #         for i in range(K-1):
        #             interval_i = firstKV[2*(i+1)] - firstKV[2*i]
        #             time_i_iplus1.append(interval_i)
        # =============================================================================

        #前k个节点相邻两个节点间感染时间差平均值
        time_ave_k = sum(time_i_iplus1) / len(time_i_iplus1)
        #前k/2个节点间感染时间差平均值
        time_ave_1_k2 = sum(time_i_iplus1[:K // 2]) / len(
            time_i_iplus1[:K // 2])
        #后k/2个节点间感染时间差平均值
        time_ave_k2_k = sum(time_i_iplus1[K // 2 + 1:]) / len(
            time_i_iplus1[K // 2 + 1:])

        #第1个和第k个感染时间差
        time_k = time_1_i[-1]
        #信息被看见的速度
        speed_exposure = views_1k / time_k
        #信息扩散的速度
        speed_adoption = unique_Ak_num / time_k

        temporal_features = [
            time_ave_k, time_ave_1_k2, time_ave_k2_k, speed_exposure,
            speed_adoption
        ] + time_1_i
        #        temporal_features.extend(time_1_i)
        return temporal_features

    def calEuclideanDistance(self, vec1, vec2):
        '''计算两向量间的欧式距离'''
        dist = np.sqrt(np.sum(np.square(vec1 - vec2)))
        #    round(dist, 6)
        return dist

    def cosine_similarity(self, vector1, vector2):
        dot_product = 0.0
        normA = 0.0
        normB = 0.0
        for a, b in zip(vector1, vector2):
            dot_product += a * b
            normA += a**2
            normB += b**2
        if normA == 0.0 or normB == 0.0:
            return 0
        else:
            return round(dot_product / ((normA**0.5) * (normB**0.5)) * 100, 2)

    def get_reachability_features(self, target_node):
        '''
        adoption_node: int类型,早期感染中的一个节点
        target_node: int类型,待预测的目标节点
        distKV_target: list类型, 第一个元素记录的是目标节点编号,其他元素则是目标节点到早期节点的距离(从拓扑结构角度)
        cas_consine_similarity_firstKV_target: list类型, 计算级联embedding中, 目标节点和早期感染节点间的相似性
        '''
        #    start = time.time()

        #    distKV_target.append(target_node)
        target_topology_embedding = np.array(self.embeddings_dict[target_node])
        distKV_target = [self.calEuclideanDistance(np.array(self.embeddings_dict[adoption_node]), \
                                                   target_topology_embedding) for adoption_node in self.unique_Ak]

        if self.cas_timeline_embeddings.__contains__(target_node):
            cas_consine_similarity_firstKV_target = []
            target_timeline_embedding = self.cas_timeline_embeddings[
                target_node]
            for adoption_node in self.unique_Ak:
                if self.cas_timeline_embeddings.__contains__(adoption_node):
                    cas_consine_similarity_firstKV_target.append(
                        self.cosine_similarity(
                            self.cas_timeline_embeddings[adoption_node],
                            target_timeline_embedding))
                else:
                    cas_consine_similarity_firstKV_target.append(0)
            # cas_consine_similarity_firstKV_target = [(cosine_similarity(np.array(self.cas_timeline_embeddings[adoption_node]), \
            #                                                            target_timeline_embedding) for adoption_node in self.unique_Ak) if self.cas_timeline_embeddings.__contains__(adoption_node) else 0]

        else:
            cas_consine_similarity_firstKV_target = [
                random.uniform(0, 0.1) for i in range(len(self.unique_Ak))
            ]
        # =============================================================================
        #         for adoption_node in self.unique_Ak:
        #             adoption_topology_embedding = np.array(self.embeddings_dict[adoption_node])
        #
        #             dist = self.calEuclideanDistance(adoption_topology_embedding, target_topology_embedding)
        #             distKV_target.append(dist)
        # =============================================================================

        #    end = time.time()
        #    print ('计算单个目标节点到早期感染节点的运行时间',end-start)
        return distKV_target, cas_consine_similarity_firstKV_target

    def Target_Features(self, target_node):
        '''
        计算目标节点的特征
        '''
        #    target_features_names = ["target_neighbors_num","First_nbr_adopter",  \
        #                             "Second_nbr_adopter", "Third_nbr_adopter", \
        #                             "past_adoption_exist", "past_adoption_target_num",\
        #                             "com_hashtags_max", "com_hashtags_min", \
        #                             "com_hashtags_ave",]

        #计算early adopters到目标节点的probability of random walk with restarting

        adoption_p = self.get_adoption_p(target_node)
        r = self.rwr.compute(target_node)
        r_K = [r[node][0] for node in self.unique_Ak]
        distKV_target, cas_consine_similarity_firstKV_target = self.get_reachability_features(
            target_node)
        target_1_neighbors = list(self.G.neighbors(target_node))
        #目标节点的邻居节点数
        target_neighbors_num = len(target_1_neighbors)
        #目标节点邻居和早期感染节点的交集的个数
        First_nbr_adopter = len(set(target_1_neighbors) & set(self.unique_Ak))

        #        target_2_neighbors = [ for node in target_1_neighbors]
        target_2_neighbors = []
        for node in target_1_neighbors:
            target_2_neighbors.extend(list(self.G.neighbors(node)))
        #目标节点二阶邻居和早期感染节点的交集的个数
        Second_nbr_adopter = len(set(target_2_neighbors) & set(self.unique_Ak))

        #判断目标用户之前是否参与过话题讨论,参与过 past_adoption_exist赋值为1, 未参与则为0
        target_3_neighbors = []
        for node in target_2_neighbors:
            target_3_neighbors.extend(list(self.G.neighbors(node)))

        #目标节点二阶邻居和早期感染节点的交集的个数
        Third_nbr_adopter = len(set(target_3_neighbors) & set(self.unique_Ak))

        try:
            past_adoption_target = self.nodeToHashtag[target_node]
            #目标节点参与的话题数,目标节点的活跃度
            past_adoption_target_num = len(past_adoption_target)
            #目标节点参与话题的标签设为1
            past_adoption_exist = 1
            common_hashtags = [
                len(set(self.nodeToHashtag[node]) & set(past_adoption_target))
                for node in self.unique_Ak
            ]
            com_hashtags_max, com_hashtags_min, com_hashtags_ave = \
            [max(common_hashtags), min(common_hashtags), sum(common_hashtags)/len(common_hashtags)]
        except:
            #目标节点从未参与话题讨论
            past_adoption_exist = 0
            #目标节点从未参与话题讨论,参与的话题数为0
            past_adoption_target_num = 0

            com_hashtags_max, com_hashtags_min, com_hashtags_ave = [0, 0, 0]

        # =============================================================================
        #         #过去节点参与过话题讨论,则计算各个早期感染节点和目标节点间话题讨论的重合个数
        #         if past_adoption_exist:
        #             common_hashtags = [len(set(self.nodeToHashtag[node]) & set(past_adoption_target))  for node in self.unique_Ak]
        #             com_hashtags_max, com_hashtags_min, com_hashtags_ave = \
        #             [max(common_hashtags), min(common_hashtags), sum(common_hashtags)/len(common_hashtags)]
        #
        #         else:
        #             com_hashtags_max, com_hashtags_min, com_hashtags_ave = [0,0,0]
        # =============================================================================

        target_features = [target_node, target_neighbors_num, First_nbr_adopter, Second_nbr_adopter, \
                           Third_nbr_adopter, past_adoption_exist, past_adoption_target_num, \
                           com_hashtags_max, com_hashtags_min, com_hashtags_ave]
        target_features += r_K
        target_features += distKV_target
        target_features += cas_consine_similarity_firstKV_target + [adoption_p]
        return target_features