예제 #1
0
def louvain_community():
    """ Detect communities using the Louvain algorithm. """

    # Neo4j connection
    neo4j = neo4j_connect()

    # create in-memory graph
    with neo4j.session() as session:
        # check if graph already in memory
        graph_in_memory = session.run(
            "CALL gds.graph.exists('communityGraphSeeded') YIELD exists")

        # otherwise create it
        if not graph_in_memory.single()["exists"]:
            session.run(
                "CALL gds.graph.create.cypher('communityGraph', 'MATCH (n) RETURN id(n) AS id', 'MATCH (n)-[r:CO_OCCURRENCE]->(m) RETURN id(n) AS source, id(m) AS target')"
            )

            # set community IDs
            session.run(
                "CALL gds.louvain.write('communityGraph', { writeProperty: 'communityId'})"
            )

            # create seeded graph for future queries
            session.run(
                "CALL gds.graph.create.cypher('communityGraphSeeded', 'MATCH (n) RETURN id(n) AS id, n.communityId as communityId', 'MATCH (n)-[r:CO_OCCURRENCE]->(m) RETURN id(n) AS source, id(m) AS target')"
            )

            # drop original graph
            session.run("CALL gds.graph.drop('communityGraph')")

        # set community IDs
        session.run(
            "CALL gds.louvain.write('communityGraphSeeded', { writeProperty: 'communityId', seedProperty: 'communityId'})"
        )
예제 #2
0
def betweenness_centrality():
    """ Calculate betweenness centrality for ever node in network. """

    # Neo4j connection
    neo4j = neo4j_connect()

    # create in-memory graph
    with neo4j.session() as session:
        # create in memory graph
        session.run(
            "CALL gds.graph.create.cypher('betweennessGraph', 'MATCH (n) RETURN id(n) AS id', 'MATCH (n)-[]->(m) RETURN id(n) AS source, id(m) AS target')"
        )

        # calculate betweenness centrality
        r = session.run(
            "CALL gds.betweenness.write('betweennessGraph', { writeProperty: 'betweennessCentrality'}) YIELD minimumScore, maximumScore, createMillis, computeMillis, writeMillis"
        )

        # drop graph
        session.run("CALL gds.graph.drop('betweennessGraph')")
예제 #3
0
    world_map,
    named_entity_recognition,
    calculate_centrality,
) = parser()

if __name__ == "__main__":
    # logging
    logging.basicConfig(
        filename=os.path.join(config("LOG_DIR"), "analytics.log"),
        format=config("LOG_FORMAT"),
        level=config("LOG_LEVEL"),
        datefmt=config("LOG_DATEFMT"),
    )

    # Neo4j connection
    neo4j = neo4j_connect()

    ###############################################################################################
    # Statistics

    if statistics:
        with neo4j.session() as session:
            statistics = []

            # get statistics
            result = session.run(
                "MATCH (n) RETURN COUNT(DISTINCT n) AS nodes_count")
            statistics.append(
                ["Nodes", next(result.__iter__())["nodes_count"]])

            result = session.run(
예제 #4
0
def evaluate_information_gain():
    """ Evaluate gain in information. """

    # Neo4j connection
    neo4j = neo4j_connect()

    # get data
    with neo4j.session() as session:

        # get usernames connecting accounts
        connecting_usernames = session.run(
            "MATCH (k:UserAccount)-[:INCLUSION]-(l:Username)-[:INCLUSION]-(m:UserAccount) WHERE k.platform='vk' AND m.platform='twitter' RETURN l.username as username"
        )
        connecting_usernames = [
            x["username"] for x in connecting_usernames.__iter__()
        ]

    # save statistics
    connected_atoms_vk_total = {
        "Username": [],
        "Location": [],
        "Person": [],
        "Organization": [],
        "Phone": [],
        "Domain": [],
    }

    connected_atoms_twitter_total = {
        "Username": [],
        "Location": [],
        "Person": [],
        "Organization": [],
        "Phone": [],
        "Domain": [],
    }

    sigma = {
        "Username": [],
        "Location": [],
        "Person": [],
        "Organization": [],
        "Phone": [],
        "Domain": [],
        "total": [],
    }

    for u in tqdm(connecting_usernames,
                  desc="usernames",
                  total=len(connecting_usernames),
                  unit="records"):
        # get vk data
        query = (
            "MATCH (k)-[:INCLUSION]-(l:UserAccount)-[:INCLUSION]-(m:Username) WHERE l.platform='vk' AND m.username='******' RETURN k.nodeId as nodeId, LABELS(k)[0] AS label")
        connecting_atoms_vk = session.run(query)

        data_vk = {
            "Username": [],
            "Location": [],
            "Person": [],
            "Organization": [],
            "Phone": [],
            "Domain": [],
        }
        [
            data_vk[x["label"]].append(x["nodeId"])
            for x in connecting_atoms_vk.__iter__()
        ]

        # get twitter data
        query = (
            "MATCH (k)-[:INCLUSION]-(l:UserAccount)-[:INCLUSION]-(m:Username) WHERE l.platform='twitter' AND m.username='******' RETURN k.nodeId as nodeId, LABELS(k)[0] AS label")
        connecting_atoms_twitter = session.run(query)

        data_twitter = {
            "Username": [],
            "Location": [],
            "Person": [],
            "Organization": [],
            "Phone": [],
            "Domain": [],
        }
        [
            data_twitter[x["label"]].append(x["nodeId"])
            for x in connecting_atoms_twitter.__iter__()
        ]

        # store statistics
        [
            connected_atoms_vk_total[k].append(len(v))
            for k, v in data_vk.items()
        ]
        [
            connected_atoms_twitter_total[k].append(len(v))
            for k, v in data_twitter.items()
        ]

        vk_atom_set_total = set()
        twitter_atom_set_total = set()

        for node_type in connected_atoms_vk_total.keys():
            # intersection/union of atom sets
            vk_set = set(data_vk[node_type])
            twitter_set = set(data_twitter[node_type])
            intersection = vk_set.intersection(twitter_set)
            union = vk_set.union(twitter_set)

            # store
            vk_atom_set_total = vk_atom_set_total.union(vk_set)
            twitter_atom_set_total = twitter_atom_set_total.union(twitter_set)

            # node type specific sigma
            if len(union) > 0:
                sigma[node_type].append(len(intersection) / len(union))

        intersection_total = vk_atom_set_total.intersection(
            twitter_atom_set_total)
        union_total = vk_atom_set_total.union(twitter_atom_set_total)
        sigma["total"].append(len(intersection_total) / len(union_total))

    # store results
    results = []

    for node_type in connected_atoms_vk_total.keys():
        vk_mean = np.mean(connected_atoms_vk_total[node_type]) if len(
            connected_atoms_vk_total[node_type]) > 0 else 0
        twitter_mean = (np.mean(connected_atoms_twitter_total[node_type])
                        if len(connected_atoms_twitter_total[node_type]) > 0
                        else 0)
        results.append([
            node_type,
            np.round(vk_mean, 2),
            np.round(twitter_mean, 2),
            np.round(np.mean(sigma[node_type]), 2),
        ])

    results.append(
        ["sigma total",
         np.round(np.mean(sigma["total"]), 2), "", ""])

    table_plotter(
        data=results,
        filename="information_overlap_vk_twitter_user_accounts.png",
        title="Information overlap of VK and Twitter user accounts",
        param_plot={"figsize": (9, 12)},
        param_ax={
            "colLabels": ["", "$\\bf{VK}$", "$\\bf{Twitter}$", "$\sigma_e$"],
            "cellLoc": "left",
            "colLoc": "left",
            "loc": "upper center",
            "edges": "horizontal",
            "colWidths": [0.25, 0.25, 0.25, 0.25],
        },
    )