def check_text_for_location(text): # Search the tweet for possible geolocations by checkin if google knows the string: coords_array = [] # What to do when we find multiple? for word in text.split(' '): if (not word in BLACKLIST) and (not '@' in word):# and (word in WHITELIST): query = word coords = get_coordinates(query) # Returns [lat, lng] or None if not found if coords != None: coords_array.append(coords) if RETURN_ON_FIRST: return coords_array # Return the found coords on None if tweet didn't contain any useful information if len(coords_array) > 0: return coords_array else: return None
def check_text_for_location(text): # Search the tweet for possible geolocations by checkin if google knows the string: coords_array = [] # What to do when we find multiple? for word in text.split(' '): if (not word in BLACKLIST) and (not '@' in word): # and (word in WHITELIST): query = word coords = get_coordinates( query) # Returns [lat, lng] or None if not found if coords != None: coords_array.append(coords) if RETURN_ON_FIRST: return coords_array # Return the found coords on None if tweet didn't contain any useful information if len(coords_array) > 0: return coords_array else: return None
def create_db(self, input_file): """ Imports the input_file into the SQLLite3 DB for this DB Object """ start_time = time.clock() lines = 0 try: print "\n\n\nCP 0" conn = sqlite3.connect(self.db_name) c = conn.cursor() print "CP 1" c.execute( '''CREATE TABLE place_names (id TEXT, coordinates TEXT)''') print "CP 2" lst = list() with open(input_file, "rb") as in_file: for line in in_file: place = line.strip() print place coordinates = get_coordinates(place) lst.append((place, coordinates)) lines += 1 print "CP 3" c.executemany("INSERT INTO place_names VALUES (?,?)", lst) print "CP 4" conn.commit() conn.close() print "CP 5" ret_code = 0 except: print "ERROR BUILDING DATABASE!\n" conn.rollback() ret_code = -1 elapsed_time = time.clock() - start_time print "Time elapsed: {} seconds".format(elapsed_time) print "Read {} lines".format(lines) return ret_code
def create_db(self, input_file): """ Imports the input_file into the SQLLite3 DB for this DB Object """ start_time = time.clock() lines = 0 try: print "\n\n\nCP 0" conn = sqlite3.connect(self.db_name) c = conn.cursor() print "CP 1" c.execute('''CREATE TABLE place_names (id TEXT, coordinates TEXT)''') print "CP 2" lst = list() with open(input_file, "rb") as in_file: for line in in_file: place = line.strip() print place coordinates = get_coordinates(place) lst.append((place, coordinates)) lines += 1 print "CP 3" c.executemany("INSERT INTO place_names VALUES (?,?)", lst) print "CP 4" conn.commit() conn.close() print "CP 5" ret_code = 0 except: print "ERROR BUILDING DATABASE!\n" conn.rollback() ret_code = -1 elapsed_time = time.clock() - start_time print "Time elapsed: {} seconds".format(elapsed_time) print "Read {} lines".format(lines) return ret_code
def format2geoJSON(tweet): # check for all possibilities: # 1: tweet contains Coord info AND tweets about another loation # 2. tweet contains coord info and no location in the body # 3. tweet contains place info AND location in body # 4. tweet contains place and no location in the body # 5. tweet contains no location info, but tweets about another place # 6. tweet is useless; no location info at all try: if tweet["coordinates"] != None: # Twitter already did the job! # get lat,lng and create geoJSON object: if DOUBLE_CHECK: coords_in_tweet = check_text_for_location(tweet["text"]) if coords_in_tweet == None: # (2) Nothing found in the text, use only the coords tweet_geoJSON = { "type": "Feature", "geometry": { "type": "Point", "coordinates": tweet["coordinates"] }, "properties": { "name": tweet["user"]["screen_name"], "user_description": tweet["user"]["description"], "user_img": tweet["user"]["profile_image_url"], "place": tweet["place"], "user_place": tweet["user"]["location"], "default_profile": tweet["user"]["default_profile"], "followers_count": tweet["user"]["followers_count"], "verified": tweet["user"]["verified"], "lang": tweet["user"]["lang"], "tweet_body": tweet["text"], "time": tweet["created_at"], "favorite_count": tweet["favorite_count"], "retweeted": tweet["retweeted"], "in_reply_to_user_id_str": tweet["in_reply_to_user_id_str"], "in_reply_to_status_id_str": tweet["in_reply_to_status_id_str"], "possibly_sensitive": tweet["possibly_sensitive"], "hashtags": tweet["entities"]["hashtags"], "symbols": tweet["entities"]["symbols"], "user_mentions": tweet["entities"]["user_mentions"], "urls": tweet["entities"]["urls"], } } return tweet_geoJSON else: # tweets about another location (1) # Build a line between the two coords: tweet_line_geoJSON = { "type": "Feature", "geometry": { "type": "LineString", "coordinates": coords_in_tweet.append(tweet["coordinates"]) #[tweet["coordinates"], coords_in_tweet] }, "style":{ "fill": "blue" }, "properties": { "name": tweet["user"]["screen_name"], "user_description": tweet["user"]["description"], "user_img": tweet["user"]["profile_image_url"], "place": tweet["place"], "user_place": tweet["user"]["location"], "default_profile": tweet["user"]["default_profile"], "followers_count": tweet["user"]["followers_count"], "verified": tweet["user"]["verified"], "lang": tweet["user"]["lang"], "tweet_body": tweet["text"], "time": tweet["created_at"], "favorite_count": tweet["favorite_count"], "retweeted": tweet["retweeted"], "in_reply_to_user_id_str": tweet["in_reply_to_user_id_str"], "in_reply_to_status_id_str": tweet["in_reply_to_status_id_str"], "possibly_sensitive": tweet["possibly_sensitive"], "hashtags": tweet["entities"]["hashtags"], "symbols": tweet["entities"]["symbols"], "user_mentions": tweet["entities"]["user_mentions"], "urls": tweet["entities"]["urls"], } } return tweet_line_geoJSON elif tweet["place"] != None: # Find the associated Place # convert place to lat,lng and create geoJSON object: coords = place_lookup(tweet) if DOUBLE_CHECK: coords_in_tweet = check_text_for_location(tweet["text"]) if coords_in_tweet == None: # (4) tweet_geoJSON = { "type": "Feature", "geometry": { "type": "Point", "coordinates": coords }, "properties": { "name": tweet["user"]["screen_name"], "user_description": tweet["user"]["description"], "user_img": tweet["user"]["profile_image_url"], "place": tweet["place"], "user_place": tweet["user"]["location"], "default_profile": tweet["user"]["default_profile"], "followers_count": tweet["user"]["followers_count"], "verified": tweet["user"]["verified"], "lang": tweet["user"]["lang"], "tweet_body": tweet["text"], "time": tweet["created_at"], "favorite_count": tweet["favorite_count"], "retweeted": tweet["retweeted"], "in_reply_to_user_id_str": tweet["in_reply_to_user_id_str"], "in_reply_to_status_id_str": tweet["in_reply_to_status_id_str"], "possibly_sensitive": tweet["possibly_sensitive"], "hashtags": tweet["entities"]["hashtags"], "symbols": tweet["entities"]["symbols"], "user_mentions": tweet["entities"]["user_mentions"], "urls": tweet["entities"]["urls"], } } return tweet_geoJSON else: # (3) # Build a line between the two coords: tweet_line_geoJSON = { "type": "Feature", "geometry": { "type": "LineString", "coordinates": coords_in_tweet.append(coords) #[coords, coords_in_tweet] }, "style":{ "fill": "blue" }, "properties": { "name": tweet["user"]["screen_name"], "user_description": tweet["user"]["description"], "user_img": tweet["user"]["profile_image_url"], "place": tweet["place"], "user_place": tweet["user"]["location"], "default_profile": tweet["user"]["default_profile"], "followers_count": tweet["user"]["followers_count"], "verified": tweet["user"]["verified"], "lang": tweet["user"]["lang"], "tweet_body": tweet["text"], "time": tweet["created_at"], "favorite_count": tweet["favorite_count"], "retweeted": tweet["retweeted"], "in_reply_to_user_id_str": tweet["in_reply_to_user_id_str"], "in_reply_to_status_id_str": tweet["in_reply_to_status_id_str"], "possibly_sensitive": tweet["possibly_sensitive"], "hashtags": tweet["entities"]["hashtags"], "symbols": tweet["entities"]["symbols"], "user_mentions": tweet["entities"]["user_mentions"], "urls": tweet["entities"]["urls"], } } return tweet_line_geoJSON elif tweet["user"]["location"] != None: # Find the associated Place of the user # convert place to lat,lng and create geoJSON object: coords = get_coordinates(tweet["user"]["location"]) # Or query your own DB? TODO if DOUBLE_CHECK: coords_in_tweet = check_text_for_location(tweet["text"]) if coords_in_tweet == None: # tweet_geoJSON = { "type": "Feature", "geometry": { "type": "Point", "coordinates": coords }, "properties": { "name": tweet["user"]["screen_name"], "user_description": tweet["user"]["description"], "user_img": tweet["user"]["profile_image_url"], "place": tweet["place"], "user_place": tweet["user"]["location"], "default_profile": tweet["user"]["default_profile"], "followers_count": tweet["user"]["followers_count"], "verified": tweet["user"]["verified"], "lang": tweet["user"]["lang"], "tweet_body": tweet["text"], "time": tweet["created_at"], "favorite_count": tweet["favorite_count"], "retweeted": tweet["retweeted"], "in_reply_to_user_id_str": tweet["in_reply_to_user_id_str"], "in_reply_to_status_id_str": tweet["in_reply_to_status_id_str"], "possibly_sensitive": tweet["possibly_sensitive"], "hashtags": tweet["entities"]["hashtags"], "symbols": tweet["entities"]["symbols"], "user_mentions": tweet["entities"]["user_mentions"], "urls": tweet["entities"]["urls"], } } return tweet_geoJSON else: # # Build a line between the two coords: tweet_line_geoJSON = { "type": "Feature", "geometry": { "type": "LineString", "coordinates": coords_in_tweet.append(coords) #[coords, coords_in_tweet] }, "style":{ "fill": "blue" }, "properties": { "name": tweet["user"]["screen_name"], "user_description": tweet["user"]["description"], "user_img": tweet["user"]["profile_image_url"], "place": tweet["place"], "user_place": tweet["user"]["location"], "default_profile": tweet["user"]["default_profile"], "followers_count": tweet["user"]["followers_count"], "verified": tweet["user"]["verified"], "lang": tweet["user"]["lang"], "tweet_body": tweet["text"], "time": tweet["created_at"], "favorite_count": tweet["favorite_count"], "retweeted": tweet["retweeted"], "in_reply_to_user_id_str": tweet["in_reply_to_user_id_str"], "in_reply_to_status_id_str": tweet["in_reply_to_status_id_str"], "possibly_sensitive": tweet["possibly_sensitive"], "hashtags": tweet["entities"]["hashtags"], "symbols": tweet["entities"]["symbols"], "user_mentions": tweet["entities"]["user_mentions"], "urls": tweet["entities"]["urls"], } } return tweet_line_geoJSON else: # Twitter finds no geoLoc: if DOUBLE_CHECK: coords = check_text_for_location(tweet["text"]) if coords == None: # (6) return None else: # (5) tweet_geoJSON = { "type": "Feature", "geometry": { "type": "Point", "coordinates": coords[0] # or a LineString and pass the entire coords array }, "properties": { "name": tweet["user"]["screen_name"], "user_description": tweet["user"]["description"], "user_img": tweet["user"]["profile_image_url"], "place": tweet["place"], "user_place": tweet["user"]["location"], "default_profile": tweet["user"]["default_profile"], "followers_count": tweet["user"]["followers_count"], "verified": tweet["user"]["verified"], "lang": tweet["user"]["lang"], "tweet_body": tweet["text"], "time": tweet["created_at"], "favorite_count": tweet["favorite_count"], "retweeted": tweet["retweeted"], "in_reply_to_user_id_str": tweet["in_reply_to_user_id_str"], "in_reply_to_status_id_str": tweet["in_reply_to_status_id_str"], "possibly_sensitive": tweet["possibly_sensitive"], "hashtags": tweet["entities"]["hashtags"], "symbols": tweet["entities"]["symbols"], "user_mentions": tweet["entities"]["user_mentions"], "urls": tweet["entities"]["urls"], } } return tweet_geoJSON # Don't fail on errors except: return None
def format2geoJSON(tweet): # check for all possibilities: # 1: tweet contains Coord info AND tweets about another loation # 2. tweet contains coord info and no location in the body # 3. tweet contains place info AND location in body # 4. tweet contains place and no location in the body # 5. tweet contains no location info, but tweets about another place # 6. tweet is useless; no location info at all try: if tweet["coordinates"] != None: # Twitter already did the job! # get lat,lng and create geoJSON object: if DOUBLE_CHECK: coords_in_tweet = check_text_for_location(tweet["text"]) if coords_in_tweet == None: # (2) Nothing found in the text, use only the coords tweet_geoJSON = { "type": "Feature", "geometry": { "type": "Point", "coordinates": tweet["coordinates"] }, "properties": { "name": tweet["user"]["screen_name"], "user_description": tweet["user"]["description"], "user_img": tweet["user"]["profile_image_url"], "place": tweet["place"], "user_place": tweet["user"]["location"], "default_profile": tweet["user"]["default_profile"], "followers_count": tweet["user"]["followers_count"], "verified": tweet["user"]["verified"], "lang": tweet["user"]["lang"], "tweet_body": tweet["text"], "time": tweet["created_at"], "favorite_count": tweet["favorite_count"], "retweeted": tweet["retweeted"], "in_reply_to_user_id_str": tweet["in_reply_to_user_id_str"], "in_reply_to_status_id_str": tweet["in_reply_to_status_id_str"], "possibly_sensitive": tweet["possibly_sensitive"], "hashtags": tweet["entities"]["hashtags"], "symbols": tweet["entities"]["symbols"], "user_mentions": tweet["entities"]["user_mentions"], "urls": tweet["entities"]["urls"], } } return tweet_geoJSON else: # tweets about another location (1) # Build a line between the two coords: tweet_line_geoJSON = { "type": "Feature", "geometry": { "type": "LineString", "coordinates": coords_in_tweet.append( tweet["coordinates"] ) #[tweet["coordinates"], coords_in_tweet] }, "style": { "fill": "blue" }, "properties": { "name": tweet["user"]["screen_name"], "user_description": tweet["user"]["description"], "user_img": tweet["user"]["profile_image_url"], "place": tweet["place"], "user_place": tweet["user"]["location"], "default_profile": tweet["user"]["default_profile"], "followers_count": tweet["user"]["followers_count"], "verified": tweet["user"]["verified"], "lang": tweet["user"]["lang"], "tweet_body": tweet["text"], "time": tweet["created_at"], "favorite_count": tweet["favorite_count"], "retweeted": tweet["retweeted"], "in_reply_to_user_id_str": tweet["in_reply_to_user_id_str"], "in_reply_to_status_id_str": tweet["in_reply_to_status_id_str"], "possibly_sensitive": tweet["possibly_sensitive"], "hashtags": tweet["entities"]["hashtags"], "symbols": tweet["entities"]["symbols"], "user_mentions": tweet["entities"]["user_mentions"], "urls": tweet["entities"]["urls"], } } return tweet_line_geoJSON elif tweet["place"] != None: # Find the associated Place # convert place to lat,lng and create geoJSON object: coords = place_lookup(tweet) if DOUBLE_CHECK: coords_in_tweet = check_text_for_location(tweet["text"]) if coords_in_tweet == None: # (4) tweet_geoJSON = { "type": "Feature", "geometry": { "type": "Point", "coordinates": coords }, "properties": { "name": tweet["user"]["screen_name"], "user_description": tweet["user"]["description"], "user_img": tweet["user"]["profile_image_url"], "place": tweet["place"], "user_place": tweet["user"]["location"], "default_profile": tweet["user"]["default_profile"], "followers_count": tweet["user"]["followers_count"], "verified": tweet["user"]["verified"], "lang": tweet["user"]["lang"], "tweet_body": tweet["text"], "time": tweet["created_at"], "favorite_count": tweet["favorite_count"], "retweeted": tweet["retweeted"], "in_reply_to_user_id_str": tweet["in_reply_to_user_id_str"], "in_reply_to_status_id_str": tweet["in_reply_to_status_id_str"], "possibly_sensitive": tweet["possibly_sensitive"], "hashtags": tweet["entities"]["hashtags"], "symbols": tweet["entities"]["symbols"], "user_mentions": tweet["entities"]["user_mentions"], "urls": tweet["entities"]["urls"], } } return tweet_geoJSON else: # (3) # Build a line between the two coords: tweet_line_geoJSON = { "type": "Feature", "geometry": { "type": "LineString", "coordinates": coords_in_tweet.append( coords) #[coords, coords_in_tweet] }, "style": { "fill": "blue" }, "properties": { "name": tweet["user"]["screen_name"], "user_description": tweet["user"]["description"], "user_img": tweet["user"]["profile_image_url"], "place": tweet["place"], "user_place": tweet["user"]["location"], "default_profile": tweet["user"]["default_profile"], "followers_count": tweet["user"]["followers_count"], "verified": tweet["user"]["verified"], "lang": tweet["user"]["lang"], "tweet_body": tweet["text"], "time": tweet["created_at"], "favorite_count": tweet["favorite_count"], "retweeted": tweet["retweeted"], "in_reply_to_user_id_str": tweet["in_reply_to_user_id_str"], "in_reply_to_status_id_str": tweet["in_reply_to_status_id_str"], "possibly_sensitive": tweet["possibly_sensitive"], "hashtags": tweet["entities"]["hashtags"], "symbols": tweet["entities"]["symbols"], "user_mentions": tweet["entities"]["user_mentions"], "urls": tweet["entities"]["urls"], } } return tweet_line_geoJSON elif tweet["user"][ "location"] != None: # Find the associated Place of the user # convert place to lat,lng and create geoJSON object: coords = get_coordinates( tweet["user"]["location"]) # Or query your own DB? TODO if DOUBLE_CHECK: coords_in_tweet = check_text_for_location(tweet["text"]) if coords_in_tweet == None: # tweet_geoJSON = { "type": "Feature", "geometry": { "type": "Point", "coordinates": coords }, "properties": { "name": tweet["user"]["screen_name"], "user_description": tweet["user"]["description"], "user_img": tweet["user"]["profile_image_url"], "place": tweet["place"], "user_place": tweet["user"]["location"], "default_profile": tweet["user"]["default_profile"], "followers_count": tweet["user"]["followers_count"], "verified": tweet["user"]["verified"], "lang": tweet["user"]["lang"], "tweet_body": tweet["text"], "time": tweet["created_at"], "favorite_count": tweet["favorite_count"], "retweeted": tweet["retweeted"], "in_reply_to_user_id_str": tweet["in_reply_to_user_id_str"], "in_reply_to_status_id_str": tweet["in_reply_to_status_id_str"], "possibly_sensitive": tweet["possibly_sensitive"], "hashtags": tweet["entities"]["hashtags"], "symbols": tweet["entities"]["symbols"], "user_mentions": tweet["entities"]["user_mentions"], "urls": tweet["entities"]["urls"], } } return tweet_geoJSON else: # # Build a line between the two coords: tweet_line_geoJSON = { "type": "Feature", "geometry": { "type": "LineString", "coordinates": coords_in_tweet.append( coords) #[coords, coords_in_tweet] }, "style": { "fill": "blue" }, "properties": { "name": tweet["user"]["screen_name"], "user_description": tweet["user"]["description"], "user_img": tweet["user"]["profile_image_url"], "place": tweet["place"], "user_place": tweet["user"]["location"], "default_profile": tweet["user"]["default_profile"], "followers_count": tweet["user"]["followers_count"], "verified": tweet["user"]["verified"], "lang": tweet["user"]["lang"], "tweet_body": tweet["text"], "time": tweet["created_at"], "favorite_count": tweet["favorite_count"], "retweeted": tweet["retweeted"], "in_reply_to_user_id_str": tweet["in_reply_to_user_id_str"], "in_reply_to_status_id_str": tweet["in_reply_to_status_id_str"], "possibly_sensitive": tweet["possibly_sensitive"], "hashtags": tweet["entities"]["hashtags"], "symbols": tweet["entities"]["symbols"], "user_mentions": tweet["entities"]["user_mentions"], "urls": tweet["entities"]["urls"], } } return tweet_line_geoJSON else: # Twitter finds no geoLoc: if DOUBLE_CHECK: coords = check_text_for_location(tweet["text"]) if coords == None: # (6) return None else: # (5) tweet_geoJSON = { "type": "Feature", "geometry": { "type": "Point", "coordinates": coords[ 0] # or a LineString and pass the entire coords array }, "properties": { "name": tweet["user"]["screen_name"], "user_description": tweet["user"]["description"], "user_img": tweet["user"]["profile_image_url"], "place": tweet["place"], "user_place": tweet["user"]["location"], "default_profile": tweet["user"]["default_profile"], "followers_count": tweet["user"]["followers_count"], "verified": tweet["user"]["verified"], "lang": tweet["user"]["lang"], "tweet_body": tweet["text"], "time": tweet["created_at"], "favorite_count": tweet["favorite_count"], "retweeted": tweet["retweeted"], "in_reply_to_user_id_str": tweet["in_reply_to_user_id_str"], "in_reply_to_status_id_str": tweet["in_reply_to_status_id_str"], "possibly_sensitive": tweet["possibly_sensitive"], "hashtags": tweet["entities"]["hashtags"], "symbols": tweet["entities"]["symbols"], "user_mentions": tweet["entities"]["user_mentions"], "urls": tweet["entities"]["urls"], } } return tweet_geoJSON # Don't fail on errors except: return None
"tweet_body": "I freaking love maps!", } } http://geojson.org/geojson-spec.html#bounding-boxes """ from getLocation import get_coordinates def check_text_for_location(text): # Search the tweet for possible geolocations by checkin if google knows the string: coords_array = [] # What to do when we find multiple? # Maybe have a White and black list? Often RT, USGS or simialr are icluded but not useful... for #TODO query = None coords = get_coordinates(query) # Returns [lat, lng] or None if not found if coords != None: # return coords if we find sth: # return coords coords_array.append(coords) # otherwise return None if you don't find anything return None def place_lookup(tweet): # https://twittercommunity.com/t/schema-of-boundingbox-in-places-section/8663 boundingBox = tweet["place"]["bounding_box"]["coordinates"][0] lat = float((boundingBox[0][0] + boundingBox[1][0]) / 2.0) lng = float((boundingBox[1][1] + boundingBox[2][1]) / 2.0) return [lat, lng]