def resync_tweets_pg_to_neo(self): self.neo.archive_map() last_tweet_id = None current_tweet = None counter = 0 cursor = self.pg.get_all_tweets() for record in cursor: if record[0] != last_tweet_id: self.neo.add_Tweet_to_database( current_tweet) if current_tweet else None if (counter % 3000 == 0): LOGGER.info("%s:%s " % (counter, current_tweet)) current_tweet = Tweet(record, 'database') counter += 1 else: current_tweet.from_database_add_entities(record) last_tweet_id = record[0] cursor.close() self.neo.add_Tweet_to_database( current_tweet) if current_tweet else None self.neo.delete_archived_map()
def test_parse_retweet(self): # https://twitter.com/condnsdmatters/status/715175357658374148 # Tweet: 715175357658374148 condnsdmatters || RC: 17 || FC: 0 || RT: GOVUK || @ 1 || # 1 || Url 1 # Content: For the latest on the floods, please follow @EnvAgency, #floodaware or visit GOV.UK: https://t.co/kZAdl7JvKb status = self.api.get_status(715175357658374148) test_tweet = Tweet(status, 'twitter') self.assertEqual(test_tweet.tweet_id, 715175357658374148) self.assertEqual(test_tweet.user_id, 701110092675031041) self.assertEqual(test_tweet.handle, 'condnsdmatters') self.assertEqual(test_tweet.mentions, [(47331384, 'EnvAgency')]) self.assertEqual( test_tweet.content, 'For the latest on the floods, please follow @EnvAgency, #floodaware or visit GOV.UK: https://t.co/kZAdl7JvKb' ) self.assertEqual(test_tweet.is_retweet, True) self.assertEqual(test_tweet.retweet_status_id, 673864586982854657) self.assertEqual(test_tweet.retweeted_user, (17481977, 'GOVUK')) self.assertEqual(test_tweet.retweet_count, 17) self.assertEqual(test_tweet.favourite_count, 0) self.assertEqual(test_tweet.hashtags, ['floodaware']) self.assertEqual(test_tweet.date, datetime.datetime(2016, 3, 30, 13, 54, 27)) self.assertEqual(test_tweet.urls, [ 'https://www.gov.uk/prepare-for-a-flood/find-out-if-youre-at-risk' ]) self.assertEqual(test_tweet.in_reply_to_status_id, None) self.assertEqual(test_tweet.in_reply_to_user, None) self.assertEqual(test_tweet.is_reply, False)
def test_parse_reply_on_other(self): # https://twitter.com/WhittakerTrevor/status/674157223334060032 # Tweet: 674157223334060032 WhittakerTrevor || RC: 0 || FC: 0 || RT: REPLY || @ 2 || # 0 || Url 0 # Content: @GOVUK @EnvAgency STOP CUTTING GREEN SUBSIDIES WHAT YOU TRYING TO SAVE MONEY FOR MONEY YOU WONT NEED IT WHEN YOU HAVE DESYROYED THE EARTH status = self.api.get_status(674157223334060032) test_tweet = Tweet(status, 'twitter') self.assertEqual(test_tweet.tweet_id, 674157223334060032) self.assertEqual(test_tweet.user_id, 464109437) self.assertEqual(test_tweet.handle, 'WhittakerTrevor') self.assertEqual(test_tweet.mentions, [(17481977, 'GOVUK'), (47331384, 'EnvAgency')]) self.assertEqual( test_tweet.content, '@GOVUK @EnvAgency STOP CUTTING GREEN SUBSIDIES WHAT YOU TRYING TO SAVE MONEY FOR MONEY YOU WONT NEED IT WHEN YOU HAVE DESYROYED THE EARTH' ) self.assertEqual(test_tweet.is_retweet, False) self.assertEqual(test_tweet.retweet_status_id, 0) self.assertEqual(test_tweet.retweeted_user, None) self.assertEqual(test_tweet.retweet_count, 0) self.assertEqual(test_tweet.favourite_count, 0) self.assertEqual(test_tweet.hashtags, []) self.assertEqual(test_tweet.date, datetime.datetime(2015, 12, 8, 9, 23, 1)) self.assertEqual(test_tweet.urls, []) self.assertEqual(test_tweet.in_reply_to_status_id, 673864586982854657) self.assertEqual(test_tweet.in_reply_to_user, (17481977, 'GOVUK')) self.assertEqual(test_tweet.is_reply, True)
def test_parse_mention_hash_link(self): # https://twitter.com/GOVUK/status/673864586982854657 # Tweet: 673864586982854657 GOVUK || RC: 17 || FC: 9 || RT: None || @ 1 || # 1 || Url 1 # Content: For the latest on the floods, please follow @EnvAgency, #floodaware or visit GOV.UK: https://t.co/kZAdl7JvKb status = self.api.get_status(673864586982854657) test_tweet = Tweet(status, 'twitter') self.assertEqual(test_tweet.tweet_id, 673864586982854657) self.assertEqual(test_tweet.user_id, 17481977) self.assertEqual(test_tweet.handle, 'GOVUK') self.assertEqual(test_tweet.mentions, [(47331384, 'EnvAgency')]) self.assertEqual( test_tweet.content, 'For the latest on the floods, please follow @EnvAgency, #floodaware or visit GOV.UK: https://t.co/kZAdl7JvKb' ) self.assertEqual(test_tweet.is_retweet, False) self.assertEqual(test_tweet.retweeted_user, None) self.assertEqual(test_tweet.retweet_count, 17) self.assertEqual(test_tweet.favourite_count, 9) self.assertEqual(test_tweet.hashtags, ['floodaware']) self.assertEqual(test_tweet.date, datetime.datetime(2015, 12, 7, 14, 0, 11)) self.assertEqual(test_tweet.urls, [ 'https://www.gov.uk/prepare-for-a-flood/find-out-if-youre-at-risk' ]) self.assertEqual(test_tweet.in_reply_to_status_id, None) self.assertEqual(test_tweet.in_reply_to_user, None) self.assertEqual(test_tweet.is_reply, False)
def test_parse_reply_on_self(self): # https://twitter.com/annaturley/status/714936442384990208 # Tweet: 714936442384990208 annaturley || RC: 2 || FC: 0 || RT: REPLY || @ 1 || # 0 || Url 0 # Content: @Opensout I want them to step in and take control of the site. I am just gutted they wouldn't consider this for Redcar. status = self.api.get_status(714936442384990208) test_tweet = Tweet(status, 'twitter') self.assertEqual(test_tweet.tweet_id, 714936442384990208) self.assertEqual(test_tweet.user_id, 22398060) self.assertEqual(test_tweet.handle, 'annaturley') self.assertEqual(test_tweet.mentions, [(3002057294, u'Opensout')]) self.assertEqual( test_tweet.content, "@Opensout I want them to step in and take control of the site. I am just gutted they wouldn't consider this for Redcar." ) self.assertEqual(test_tweet.is_retweet, False) self.assertEqual(test_tweet.retweet_status_id, 0) self.assertEqual(test_tweet.retweeted_user, None) self.assertEqual(test_tweet.retweet_count, 2) self.assertEqual(test_tweet.favourite_count, 0) self.assertEqual(test_tweet.hashtags, []) self.assertEqual(test_tweet.date, datetime.datetime(2016, 3, 29, 22, 5, 5)) self.assertEqual(test_tweet.urls, []) self.assertEqual(test_tweet.in_reply_to_status_id, 714935890695614466) self.assertEqual(test_tweet.in_reply_to_user, (3002057294, u'Opensout')) self.assertEqual(test_tweet.is_reply, True)
def resync_tweets_pg_to_neo(self): self.neo.archive_map() last_tweet_id = None current_tweet = None counter = 0 cursor = self.pg.get_all_tweets() for record in cursor: if record[0] != last_tweet_id: self.neo.add_Tweet_to_database(current_tweet) if current_tweet else None if (counter%3000 == 0): LOGGER.info("%s:%s "%(counter,current_tweet)) current_tweet = Tweet(record, 'database') counter+=1 else: current_tweet.from_database_add_entities(record) last_tweet_id = record[0] cursor.close() self.neo.add_Tweet_to_database(current_tweet) if current_tweet else None self.neo.delete_archived_map()
def test_parse_mention(self): # https://twitter.com/condnsdmatters/status/715174694958272513 # Tweet: 715174694958272513 condnsdmatters || RC: 0 || FC: 0 || RT: None || @ 1 || # 0 || Url 0 # Content: platy loves @GOVUK status = self.api.get_status(715174694958272513) test_tweet = Tweet(status, 'twitter') self.assertEqual(test_tweet.tweet_id, 715174694958272513) self.assertEqual(test_tweet.user_id, 701110092675031041) self.assertEqual(test_tweet.handle, 'condnsdmatters') self.assertEqual(test_tweet.mentions, [(17481977, 'GOVUK')]) self.assertEqual(test_tweet.content, 'platy loves @GOVUK') self.assertEqual(test_tweet.is_retweet, False) self.assertEqual(test_tweet.retweeted_user, None) self.assertEqual(test_tweet.retweet_count, 0) self.assertEqual(test_tweet.favourite_count, 0) self.assertEqual(test_tweet.hashtags, []) self.assertEqual(test_tweet.date, datetime.datetime(2016, 3, 30, 13, 51, 49)) self.assertEqual(test_tweet.urls, []) self.assertEqual(test_tweet.in_reply_to_status_id, None) self.assertEqual(test_tweet.in_reply_to_user, None) self.assertEqual(test_tweet.is_reply, False)
def test_add_Tweet_to_database__retweet(self): # TEST: (tiny) ->(MBEyes) - reply & mention; # (tiny) ->(Kdog) mention_by_proxy EG: (ret->MBE):"Hey @MBE, @Kdog" neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) # Test Data new_tweet = Tweet(None, 'test') new_tweet.tweet_id = 1 new_tweet.user_id = 400000 new_tweet.handle = 'tinyhands' new_tweet.mentions = [(300000, 'Kdog')] new_tweet.content = 'Generic tweet @Kdog' # not stored here new_tweet.is_retweet = True new_tweet.retweeted_user = (0, 'MBEyes') new_tweet.retweet_status_id = 2 new_tweet.is_reply = False new_tweet.in_reply_to_user = None new_tweet.in_reply_to_status_id = None new_tweet.retweet_count = 3 # not stored here new_tweet.favourite_count = 4 # not stored here new_tweet.hashtags = [] # not stored here new_tweet.date = 'a date string' new_tweet.urls = ['https://url.com/'] # not stored here new_tweet.website_link = 'twitter.com/status/madeupstatus1' # Add to database neo_db_handler.add_Tweet_to_database(new_tweet) # Preliminary check results = [ _ for _ in self.graph.cypher.execute( """MATCH (a {handle:'tinyhands'})-[r]->(b) RETURN r, b.name ORDER BY b.name""")] self.assertEqual(len(results), 2) # In depth check self.assertEqual(results[0][0].type, u'INDIRECT') self.assertEqual(results[0][1], 'Kendog Lamar') self.assertEqual(results[0][0]["mentions"], 1) self.assertEqual(results[0][0]["mention_last"], '1') self.assertEqual(results[0][0]["mention_date"], 'a date string') self.assertEqual(results[0][0]["replies"], 0) self.assertEqual(results[0][0]["reply_last"], '') self.assertEqual(results[0][0]["reply_date"], '') self.assertEqual(results[0][0]["retweets"], 0) self.assertEqual(results[0][0]["retweet_last"], '') self.assertEqual(results[0][0]["retweet_date"], '') self.assertEqual(results[1][0].type, u'DIRECT') self.assertEqual(results[1][1], 'Michael Blue Eyes') self.assertEqual(results[1][0]["mentions"], 0) self.assertEqual(results[1][0]["mention_last"], '') self.assertEqual(results[1][0]["mention_date"], '') self.assertEqual(results[1][0]["replies"], 0) self.assertEqual(results[1][0]["reply_last"], '') self.assertEqual(results[1][0]["reply_date"], '') self.assertEqual(results[1][0]["retweets"], 1) self.assertEqual(results[1][0]["retweet_last"], '1') self.assertEqual(results[1][0]["retweet_date"], 'a date string')
def test_add_Tweet_to_database__reply(self): # TEST: (LRich) ->(tBW) - reply & mention; # (LRich) ->(tinyhands) mention EG: (reply->tBW):"Hey @tBW, @tinyhands" neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) # Test Data new_tweet = Tweet(None, 'test') new_tweet.tweet_id = 1 new_tweet.user_id = 100000 new_tweet.handle = 'LRichy' new_tweet.mentions = [(400000, 'tinyhands'), (200000, 'tBW')] new_tweet.content = 'Generic tweet @tinyhands @tBW' # not stored here new_tweet.is_retweet = False new_tweet.retweeted_user = None new_tweet.retweet_status_id = 0 new_tweet.is_reply = True new_tweet.in_reply_to_user = (200000, 'tBW') new_tweet.in_reply_to_status_id = 2 new_tweet.retweet_count = 3 # not stored here new_tweet.favourite_count = 4 # not stored here new_tweet.hashtags = ['clothes'] # not stored here new_tweet.date = 'a date string' new_tweet.urls = ['https://url.com/'] # not stored here new_tweet.website_link = 'twitter.com/status/madeupstatus1' # Add to database neo_db_handler.add_Tweet_to_database(new_tweet) # Preliminary check results = [ _ for _ in self.graph.cypher.execute( """MATCH (a {handle:'LRichy'})-[r]->(b) WHERE b.handle<>'MBEyes' RETURN r, b.name ORDER BY b.name""")] self.assertEqual(len(results), 2) # In depth check self.assertEqual(results[0][0].type, u'DIRECT') self.assertEqual(results[0][1], 'The Boy Wonder') self.assertEqual(results[0][0]["mentions"], 0) self.assertEqual(results[0][0]["mention_last"], '') self.assertEqual(results[0][0]["mention_date"], '') self.assertEqual(results[0][0]["replies"], 1) self.assertEqual(results[0][0]["reply_last"], '1') self.assertEqual(results[0][0]["reply_date"], 'a date string') self.assertEqual(results[0][0]["retweets"], 0) self.assertEqual(results[0][0]["retweet_last"], '') self.assertEqual(results[0][0]["retweet_date"], '') self.assertEqual(results[1][0].type, u'DIRECT') self.assertEqual(results[1][1], 'Tiny Hands') self.assertEqual(results[1][0]["mentions"], 1) self.assertEqual(results[1][0]["mention_last"], '1') self.assertEqual(results[1][0]["mention_date"], 'a date string') self.assertEqual(results[1][0]["replies"], 0) self.assertEqual(results[1][0]["reply_last"], '') self.assertEqual(results[1][0]["reply_date"], '') self.assertEqual(results[1][0]["retweets"], 0) self.assertEqual(results[1][0]["retweet_last"], '') self.assertEqual(results[1][0]["retweet_date"], '')
def test_add_Tweet_to_database__mention(self): # TEST: (LRich)->(tinyhands) - mention: ("Hey @tinyhands") neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) # Test Data new_tweet = Tweet(None, 'test') new_tweet.tweet_id = 1 new_tweet.user_id = 100000 new_tweet.handle = 'LRichy' new_tweet.mentions = [(400000, 'tinyhands')] new_tweet.content = 'Generic tweet @tinyhands' # not stored here new_tweet.is_retweet = False new_tweet.retweeted_user = None new_tweet.retweet_status_id = 0 new_tweet.is_reply = False new_tweet.in_reply_to_user = None new_tweet.in_reply_to_status_id = None new_tweet.retweet_count = 3 # not stored here new_tweet.favourite_count = 4 # not stored here new_tweet.hashtags = ['clothes'] # not stored here new_tweet.date = 'a date string' new_tweet.urls = ['https://url.com'] # not stored here new_tweet.website_link = 'twitter.com/status/madeupstatus1' # Add to database neo_db_handler.add_Tweet_to_database(new_tweet) # Preliminary check results = [ _ for _ in self.graph.cypher.execute( """MATCH (a {handle:'LRichy'})-[r]->(b {handle:'tinyhands'}) RETURN r""")] self.assertEqual(len(results), 1) relationship = results[0][0] # In depth check self.assertEqual(relationship.type, u'DIRECT') self.assertEqual(relationship["mentions"], 1) self.assertEqual(relationship["mention_last"], '1') self.assertEqual(relationship["mention_date"], 'a date string') self.assertEqual(relationship["replies"], 0) self.assertEqual(relationship["reply_last"], '') self.assertEqual(relationship["reply_date"], '') self.assertEqual(relationship["retweets"], 0) self.assertEqual(relationship["retweet_last"], '') self.assertEqual(relationship["retweet_date"], '')
def test_add_Tweet_to_database__retweet(self): # TEST: (tiny) ->(MBEyes) - reply & mention; # (tiny) ->(Kdog) mention_by_proxy EG: (ret->MBE):"Hey @MBE, @Kdog" neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) # Test Data new_tweet = Tweet(None, 'test') new_tweet.tweet_id = 1 new_tweet.user_id = 400000 new_tweet.handle = 'tinyhands' new_tweet.mentions = [(300000, 'Kdog')] new_tweet.content = 'Generic tweet @Kdog' # not stored here new_tweet.is_retweet = True new_tweet.retweeted_user = (0, 'MBEyes') new_tweet.retweet_status_id = 2 new_tweet.is_reply = False new_tweet.in_reply_to_user = None new_tweet.in_reply_to_status_id = None new_tweet.retweet_count = 3 # not stored here new_tweet.favourite_count = 4 # not stored here new_tweet.hashtags = [] # not stored here new_tweet.date = 'a date string' new_tweet.urls = ['https://url.com/'] # not stored here new_tweet.website_link = 'twitter.com/status/madeupstatus1' # Add to database neo_db_handler.add_Tweet_to_database(new_tweet) # Preliminary check results = [ _ for _ in self.graph.cypher.execute( """MATCH (a {handle:'tinyhands'})-[r]->(b) RETURN r, b.name ORDER BY b.name""") ] self.assertEqual(len(results), 2) # In depth check self.assertEqual(results[0][0].type, u'INDIRECT') self.assertEqual(results[0][1], 'Kendog Lamar') self.assertEqual(results[0][0]["mentions"], 1) self.assertEqual(results[0][0]["mention_last"], '1') self.assertEqual(results[0][0]["mention_date"], 'a date string') self.assertEqual(results[0][0]["replies"], 0) self.assertEqual(results[0][0]["reply_last"], '') self.assertEqual(results[0][0]["reply_date"], '') self.assertEqual(results[0][0]["retweets"], 0) self.assertEqual(results[0][0]["retweet_last"], '') self.assertEqual(results[0][0]["retweet_date"], '') self.assertEqual(results[1][0].type, u'DIRECT') self.assertEqual(results[1][1], 'Michael Blue Eyes') self.assertEqual(results[1][0]["mentions"], 0) self.assertEqual(results[1][0]["mention_last"], '') self.assertEqual(results[1][0]["mention_date"], '') self.assertEqual(results[1][0]["replies"], 0) self.assertEqual(results[1][0]["reply_last"], '') self.assertEqual(results[1][0]["reply_date"], '') self.assertEqual(results[1][0]["retweets"], 1) self.assertEqual(results[1][0]["retweet_last"], '1') self.assertEqual(results[1][0]["retweet_date"], 'a date string')
def test_add_Tweet_to_database__reply(self): # TEST: (LRich) ->(tBW) - reply & mention; # (LRich) ->(tinyhands) mention EG: (reply->tBW):"Hey @tBW, @tinyhands" neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) # Test Data new_tweet = Tweet(None, 'test') new_tweet.tweet_id = 1 new_tweet.user_id = 100000 new_tweet.handle = 'LRichy' new_tweet.mentions = [(400000, 'tinyhands'), (200000, 'tBW')] new_tweet.content = 'Generic tweet @tinyhands @tBW' # not stored here new_tweet.is_retweet = False new_tweet.retweeted_user = None new_tweet.retweet_status_id = 0 new_tweet.is_reply = True new_tweet.in_reply_to_user = (200000, 'tBW') new_tweet.in_reply_to_status_id = 2 new_tweet.retweet_count = 3 # not stored here new_tweet.favourite_count = 4 # not stored here new_tweet.hashtags = ['clothes'] # not stored here new_tweet.date = 'a date string' new_tweet.urls = ['https://url.com/'] # not stored here new_tweet.website_link = 'twitter.com/status/madeupstatus1' # Add to database neo_db_handler.add_Tweet_to_database(new_tweet) # Preliminary check results = [ _ for _ in self.graph.cypher.execute( """MATCH (a {handle:'LRichy'})-[r]->(b) WHERE b.handle<>'MBEyes' RETURN r, b.name ORDER BY b.name""") ] self.assertEqual(len(results), 2) # In depth check self.assertEqual(results[0][0].type, u'DIRECT') self.assertEqual(results[0][1], 'The Boy Wonder') self.assertEqual(results[0][0]["mentions"], 0) self.assertEqual(results[0][0]["mention_last"], '') self.assertEqual(results[0][0]["mention_date"], '') self.assertEqual(results[0][0]["replies"], 1) self.assertEqual(results[0][0]["reply_last"], '1') self.assertEqual(results[0][0]["reply_date"], 'a date string') self.assertEqual(results[0][0]["retweets"], 0) self.assertEqual(results[0][0]["retweet_last"], '') self.assertEqual(results[0][0]["retweet_date"], '') self.assertEqual(results[1][0].type, u'DIRECT') self.assertEqual(results[1][1], 'Tiny Hands') self.assertEqual(results[1][0]["mentions"], 1) self.assertEqual(results[1][0]["mention_last"], '1') self.assertEqual(results[1][0]["mention_date"], 'a date string') self.assertEqual(results[1][0]["replies"], 0) self.assertEqual(results[1][0]["reply_last"], '') self.assertEqual(results[1][0]["reply_date"], '') self.assertEqual(results[1][0]["retweets"], 0) self.assertEqual(results[1][0]["retweet_last"], '') self.assertEqual(results[1][0]["retweet_date"], '')
def test_add_Tweet_to_database__mention(self): # TEST: (LRich)->(tinyhands) - mention: ("Hey @tinyhands") neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB) # Test Data new_tweet = Tweet(None, 'test') new_tweet.tweet_id = 1 new_tweet.user_id = 100000 new_tweet.handle = 'LRichy' new_tweet.mentions = [(400000, 'tinyhands')] new_tweet.content = 'Generic tweet @tinyhands' # not stored here new_tweet.is_retweet = False new_tweet.retweeted_user = None new_tweet.retweet_status_id = 0 new_tweet.is_reply = False new_tweet.in_reply_to_user = None new_tweet.in_reply_to_status_id = None new_tweet.retweet_count = 3 # not stored here new_tweet.favourite_count = 4 # not stored here new_tweet.hashtags = ['clothes'] # not stored here new_tweet.date = 'a date string' new_tweet.urls = ['https://url.com'] # not stored here new_tweet.website_link = 'twitter.com/status/madeupstatus1' # Add to database neo_db_handler.add_Tweet_to_database(new_tweet) # Preliminary check results = [ _ for _ in self.graph.cypher.execute( """MATCH (a {handle:'LRichy'})-[r]->(b {handle:'tinyhands'}) RETURN r""") ] self.assertEqual(len(results), 1) relationship = results[0][0] # In depth check self.assertEqual(relationship.type, u'DIRECT') self.assertEqual(relationship["mentions"], 1) self.assertEqual(relationship["mention_last"], '1') self.assertEqual(relationship["mention_date"], 'a date string') self.assertEqual(relationship["replies"], 0) self.assertEqual(relationship["reply_last"], '') self.assertEqual(relationship["reply_date"], '') self.assertEqual(relationship["retweets"], 0) self.assertEqual(relationship["retweet_last"], '') self.assertEqual(relationship["retweet_date"], '')