if latest_datetime[0]: latest_date_formatted = latest_datetime[0].strftime('%Y-%m-%dT%H:%M:%SZ') url = base_stream_url + '?since={date}'.format( date=latest_date_formatted) else: url = base_stream_url if len(sys.argv) > 1 and sys.argv[1] == 'nohistorical': url = base_stream_url for event in EventSource(url): if event.event == 'message': try: change = json.loads(event.data) except ValueError: continue hashtag_matches = hashtag_match(change['comment']) if hashtag_matches and valid_edit(change): for hashtag in hashtag_matches: if db.is_duplicate(hashtag, change['id']): print("Skipped duplicate {hashtag} ({id})".format( hashtag=hashtag, id=change['id'])) elif valid_hashtag(hashtag): # Check edit_summary length, truncate if necessary if len(change['comment']) > 800: change['comment'] = change['comment'][:799] db.insert_db(hashtag, change)
retry=300000, # The timeout argument gets passed to requests.get. # An integer value sets connect (socket connect) and # read (time to first byte / since last byte) timeout values. # A tuple value sets each respective value independently. # https://requests.readthedocs.io/en/latest/user/advanced/#timeouts timeout=(3.05, 30)): if event.event == 'message': try: change = json.loads(event.data) except ValueError: continue hashtag_matches = hashtag_match(change['comment']) if hashtag_matches and valid_edit(change): for hashtag in hashtag_matches: if 'id' not in change: print("Couldn't find recent changes ID in data. Skipping.") continue if db.is_duplicate(hashtag, change['id']): print("Skipped duplicate {hashtag} (rc_id = {id})".format( hashtag=hashtag, id=change['id'])) continue if not valid_hashtag(hashtag): continue # Check edit_summary length, truncate if necessary if len(change['comment']) > 800: change['comment'] = change['comment'][:799] populate_media_information(change) db.insert_db(hashtag, change)