Ejemplo n.º 1
0
def connect_users_cities(graph, args):
    city_dict = graph_util.query_label_to_dict(graph, "City", "name")
    users = graph.nodes.match("User")
    relationships = []
    print("Creating IS_FROM relationships..")
    for user in tqdm(users):
        for city in user['city']:
            relationships.append(Relationship(user, "IS_FROM",
                                              city_dict[city]))
    print("Done.")
    print("Writing %d relationships.. " % len(relationships))
    graph_util.create_in_batch(graph, relationships)
    print("Done.")
Ejemplo n.º 2
0
def write_hydrated_tweets(graph, args):

    print("Reading tweets..")
    tweets = None
    with open("../data/tweets_fully_hydrated.json", encoding='utf-8',
              mode='r') as f:
        tweets = json.load(f)
    print("Done.")
    print("Creating Tweet nodes..")
    tweet_nodes = []
    with tqdm(total=len(tweets)) as pbar:
        for tweet in tweets:
            tweet_nodes.append(create_hydrated_tweet(tweet))
            pbar.update(1)
    print("Done.")
    print("Writing %d tweets to database.." % len(tweet_nodes))
    graph_util.create_in_batch(graph, tweet_nodes)
    print("Done.")
Ejemplo n.º 3
0
def write_users(graph, args):
    print("Reading Userobjects..")
    users = None
    with open("../data/users_with-loc_with-gender.json",
              encoding='utf-8',
              mode='r') as f:
        inf = f.read()
        users = json.loads(inf)
    print("Read.")

    user_nodes = []
    print("Creating User nodes..")
    for user in tqdm(users):
        user_nodes.append(create_user_node(user))
    print("Done.")
    print("Writing %d users to database.." % len(user_nodes))
    graph_util.create_in_batch(graph, user_nodes)

    print("Done.")
Ejemplo n.º 4
0
def write_tweets(graph, args):
    print("WARNING, DEPRECTED")

    print("Reading tweets..")
    tweets = None
    with open("../../Datar/twitter/180524_data_posstagged.pickle",
              mode='rb') as f:
        data = f.read()
        tweets = pickle.loads(data)

    tweets.fillna("N/A", inplace=True)  # stop neo4j from breaking!!
    print("Done.")
    print("Creating Tweet nodes..")
    tweet_nodes = []
    with tqdm(total=len(tweets)) as pbar:
        for index, tweet in tweets.iterrows():
            tweet_nodes.append(create_tweet_node(tweet))
            pbar.update(1)
    print("Done.")
    print("Writing %d tweets to database.." % len(tweet_nodes))
    graph_util.create_in_batch(graph, tweet_nodes)
    print("Done.")
Ejemplo n.º 5
0
def annotate_politicians(graph, args):
    with open("../data/politician_user_handles.json",
              encoding='utf-8',
              mode='r') as f:
        inf = f.read()
        temp = json.loads(inf)
    parties = {}
    politicians = {}
    ## flip the dict around to have things more readable
    for party in temp.keys():
        parties[party] = Node('Party',
                              name=party)  ## create party nodes along the way
        for politician in temp[party]:
            politicians[politician] = party

    relationships = []
    graph_util.create_in_batch(graph, list(parties.values()))
    print("Done.")

    print("Creating party membership relations..")

    for politician in tqdm(politicians.keys()):

        politician_node = graph.nodes.match("User").where(
            "_.screen_name = \"%s\"" % politician).first()

        if politician_node != None:
            ### this needs to be done when updating anything, this is bullshit.
            graph.merge(politician_node)
            politician_node.add_label("Politician")
            graph.push(politician_node)

            relationships.append(
                Relationship(politician_node, "IS_MEMBER_OF",
                             parties[politicians[politician]]))
    print("Done.")
    print("Writing %d party membership relations to database..")
    graph_util.create_in_batch(graph, relationships)
    print("Done.")
Ejemplo n.º 6
0
def connect_tweets_users(graph, args):
    relationships = []
    user_nodes = graph_util.query_label_to_dict(graph, "User", "screen_name")
    tweet_nodes = graph.nodes.match("Tweet")

    print("Creating relationships between users via tweets..")

    for tweet in tqdm(tweet_nodes):
        if tweet['screen_name'] in user_nodes.keys():
            user = user_nodes[tweet['screen_name']]  # relate the tweet
            relationships.append(Relationship(user, "TWEETED", tweet))
        if isinstance(tweet['mentions'], list):
            for mention in tweet['mentions']:
                if mention in user_nodes.keys():
                    mentioned_user = user_nodes[
                        mention]  # grabbing the user object by screen_name
                    relationships.append(
                        Relationship(tweet, "MENTIONS", mentioned_user))
    print("Done.")

    #### lets write mentions
    print("Writing %d relationships to database.." % len(relationships))
    graph_util.create_in_batch(graph, relationships)
    print("Done.")
Ejemplo n.º 7
0
def write_locations(graph, args):
    #### and let's write the location community.

    def create_state_node_dict(states):
        result = {}
        for state in set(states):
            result[state] = Node("State", name=state)
        return result

    def load_city_mapping():
        """ closed in function to get data
        """
        city_mapping = {}

        find_state = re.compile("(\(\w{2}\))")
        find_city = re.compile("(.+ )")
        with open("../data/german_cities_raw.txt", encoding='utf-8',
                  mode='r') as f:
            for entry in f:
                city = find_city.findall(entry)[0].strip()
                state = find_state.findall(entry)[0].replace("(", "").replace(
                    ")", "")
                city_mapping[city] = state
        return city_mapping

    city_mapping = load_city_mapping()
    cities = []
    states = []
    for user in graph.nodes.match("User"):
        cities.extend(user['city'])
        states.extend(user['state'])

    states = create_state_node_dict(states)
    print("Writing state nodes..")
    graph_util.create_in_batch(graph, list(states.values()))
    print("Done.")
    city_nodes = []
    for city in set(cities):
        city_nodes.append(Node("City", name=city))
    print("Creating city nodes..")
    graph_util.create_in_batch(graph, city_nodes)
    print("Done.")
    city_nodes
    city_to_states = []
    for city_node in city_nodes:
        city_to_states.append(
            Relationship(city_node, "IS_IN",
                         states[city_mapping[city_node['name']]]))
    print("Creating city to state relationships..")
    graph_util.create_in_batch(graph, city_to_states)
    print("Done.")