forked from openeventdata/phoenix_pipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
geolocation.py
93 lines (73 loc) · 2.63 KB
/
geolocation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from __future__ import unicode_literals
from __future__ import print_function
import json
import requests
import utilities
from bson.objectid import ObjectId
def query_geotext(sentence):
"""
Filters out duplicate events, leaving only one unique
(DATE, SOURCE, TARGET, EVENT) tuple per day.
Parameters
----------
sentence: String.
Text from which an event was coded.
Returns
-------
lat: String.
Latitude of a location.
lon: String.
Longitude of a location.
"""
q = "http://geotxt.org/api/1/geotxt.json?m=stanfords&q={}".format(sentence)
try:
query_out = requests.get(q)
except Exception as e:
print('There was an error requesting geolocation. {}'.format(e))
query_out = ''
if query_out:
try:
geo_results = json.loads(query_out.content)
except Exception as e:
print('There was an error: {}. Status code: {}'.format(e,
query_out.status_code))
geo_results = {'features': []}
else:
geo_results = {'features': []}
if geo_results['features']:
try:
lon, lat = geo_results['features'][0]['geometry']['coordinates']
name = geo_results['features'][0]['properties']['toponym']
except Exception as e:
lon, lat, name = '', '', ''
else:
lon, lat, name = '', '', ''
return lon, lat, name
def main(events, file_details):
"""
Pulls out a database ID and runs the ``query_geotext`` function to hit the
GeoVista Center's GeoText API and find location information within the
sentence.
Parameters
----------
events: Dictionary.
Contains filtered events from the one-a-day filter. Keys are
(DATE, SOURCE, TARGET, EVENT) tuples, values are lists of
IDs, sources, and issues.
Returns
-------
events: Dictionary.
Same as in the parameter but with the addition of a value that is
a tuple of the form (LAT, LON).
"""
coll = utilities.make_conn(file_details.auth_db, file_details.auth_user,
file_details.auth_pass)
for event in events:
event_id, sentence_id = events[event]['ids'][0].split('_')
result = coll.find_one({'_id': ObjectId(event_id.split('_')[0])})
sents = utilities.sentence_segmenter(result['content'])
query_text = sents[int(sentence_id)]
lon, lat, name = query_geotext(query_text)
if lat and lon:
events[event]['geo'] = (lon, lat, name)
return events