def interaction_score(driver: BoltDriver, username): """ For a user in the Graph, shows # comments received / # comments received + # comments made Best practice is to use it in networks with nodes with limit=None Inspired from "Analyzing behavioral trends in community driven discussion platforms like Reddit" DOI: 10.1109/ASONAM.2018.8508687 Score close to 1: User is a "starter" Score close to 0: User is a "consumer" """ s = driver.session() comments_received = list( s.run(""" MATCH (:Redditor {username: "******"})-[:AUTHORED]-(:Submission)-[:UNDER]-(c:Comment) WITH c RETURN count(c) """ % username))[0][0] # Converted Result object to integer comments_made = list( s.run(""" MATCH (:Redditor {username: "******"})-[:AUTHORED]-(c:Comment) WITH c RETURN count(c) """ % username))[0][0] return comments_received / (comments_received + comments_made)
def get_subreddit_comments_times(driver: BoltDriver, subreddit_name): s = driver.session() comments = list( s.run(""" MATCH (:Subreddit {name: "%s"})-[:UNDER]-(s:Submission)-[:UNDER]-(c:Comment) WITH c, s RETURN c.id AS id, (c.created_utc - s.created_utc) / 1000 AS seconds_past """ % subreddit_name)) comments = dict(comments) return list(comments.keys()), list(comments.values())
def query_1(driver: BoltDriver, **params) -> None: with driver.session() as session: query = """ MATCH (callee:Person {personID: $poi}) <-[r]- (caller:Person) WHERE r.started_at > datetime($timestamp) WITH DISTINCT caller AS callers RETURN collect(callers.personID) AS callers """ print(f"\nQuery 1:\n {query}") result = session.run(query, params) print(f"Result:\n{result.data()}")
def query1(driver: BoltDriver) -> None: "Who are the top 3 most-followed persons in the network?" with driver.session() as session: query = """ MATCH (follower:Person) -[:FOLLOWS]-> (person:Person) RETURN person.personID AS personID, size(collect(follower.personID)) AS numFollowers ORDER BY numFollowers DESC LIMIT 3 """ result = session.run(query) print(f"\nQuery 1:\n {query}") print(f"Top 3 most-followed persons:\n{result.data()}")
def query_5(driver: BoltDriver, **params) -> None: with driver.session() as session: query = """ MATCH (company:Company {name: $company}) <-[:HAS_CONTRACT]- (customer:Person) -[called]-> (p:Person) WHERE customer.age > $age AND EXISTS (called.call_duration) RETURN customer.fullName as name, avg(called.call_duration) AS avgCallDuration ORDER BY avgCallDuration DESC LIMIT 3 """ print(f"\nQuery 5:\n {query}") result = session.run(query, params) print(f"Result:\n{result.data()}")
def query_3(driver: BoltDriver, **params) -> None: with driver.session() as session: query = """ MATCH (p1:Person {personID: $person1}) -[:HAS_CONTRACT]-> (c:Company {name: $company}) MATCH (p2:Person {personID: $person2}) -[:HAS_CONTRACT]-> (c) MATCH (p1) --> (contact:Person) <-- (p2) WITH DISTINCT contact AS c RETURN collect(c.personID) AS commonContacts """ print(f"\nQuery 3:\n {query}") result = session.run(query, params) print(f"Result:\n{result.data()}")
def query3(driver: BoltDriver, **params) -> None: "Which are the top 5 cities in a particular region of the world with the lowest average age in the network?" with driver.session() as session: query = """ MATCH (p:Person) -[:LIVES_IN]-> (c:City) -[*..2]-> (reg:Region {name: $region}) RETURN c.name AS city, c.country AS country, avg(p.age) AS averageAge ORDER BY averageAge LIMIT 5 """ print(f"\nQuery 3:\n {query}") result = session.run(query, params) print( f"5 countries with lowest average age in {params['region']}:\n{result.data()}" )
def query2(driver: BoltDriver) -> None: "In which city does the most-followed person in the network live?" with driver.session() as session: query = """ MATCH (follower:Person) -[:FOLLOWS]-> (person:Person) WITH person, COLLECT(follower.personID) AS followers ORDER BY size(followers) DESC LIMIT 1 MATCH (person) -[:LIVES_IN]-> (city:City) RETURN person.personID AS person, size(followers) AS numFollowers, city.name AS city """ result = session.run(query) print(f"\nQuery 2:\n {query}") print(f"City in which most-followed person lives:\n{result.data()}")
def query_2(driver: BoltDriver, **params) -> None: with driver.session() as session: query = """ MATCH (c:Company {name: $company}) <-[:HAS_CONTRACT]- (suspect:Person {city: $city}) -[r1]-> (:Person) WHERE suspect.age > $suspect_age WITH suspect, r1.started_at AS patternDate MATCH (target:Person) <-[r2]- (suspect) WHERE target.age < $target_age AND r2.started_at > patternDate WITH DISTINCT target AS t RETURN collect(t.personID) AS targets """ print(f"\nQuery 2:\n {query}") result = session.run(query, params) print(f"Result:\n{result.data()}")
def query_4(driver: BoltDriver, **params) -> None: with driver.session() as session: query = """ MATCH (poi:Person {personID: $poi}) MATCH (c:Company {name: $company}) <-- (p:Person) --> (poi) WITH collect(p) AS callers UNWIND callers AS caller UNWIND callers AS callee WITH * MATCH (caller) -- (callee) WITH DISTINCT caller AS c RETURN collect(c.personID) AS callers """ print(f"\nQuery 4:\n {query}") result = session.run(query, params) print(f"Result:\n{result.data()}")
def query4(driver: BoltDriver, **params) -> None: """ Which 3 countries in the network have the most people within a specified age range? """ with driver.session() as session: query = """ MATCH (p:Person) WHERE p.age > $age_lower AND p.age < $age_upper MATCH (p) -[*..2]-> (country:Country) RETURN country.name AS countries, count(country) AS personCounts ORDER BY personCounts DESC LIMIT 3 """ print(f"\nQuery 4:\n {query}") result = session.run(query, params) print(f""" 3 Countries with the most people with age > {params['age_lower']} and < {params['age_upper']}:\n{result.data()} """)
def get_redditors(driver: BoltDriver) -> list: s = driver.session() users = list(s.run(""" MATCH (r:Redditor) WITH r RETURN r.username """)) return [user[0] for user in users]