forked from adammck/rapidsms-example-project
-
Notifications
You must be signed in to change notification settings - Fork 1
/
post.py
485 lines (435 loc) · 20.2 KB
/
post.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
#!/usr/bin/env python
# vim: ai ts=4 sts=4 et sw=4 coding=utf-8
import re
import itertools
import copy
import datetime
from exceptions import StopIteration
from django.contrib.contenttypes.models import ContentType
from rapidsms.models import Contact
from rapidsms.contrib.messagelog.models import Message
from edusupply.models import School
from edusupply.models import District
from edusupply.models import Province
from edusupply.models import Country
from edusupply.models import Confirmation
from logistics.models import Facility
from logistics.models import Campaign
from logistics.models import Commodity
from logistics.models import Cargo
from logistics.models import Shipment
from logistics.models import ShipmentSighting
from logistics.models import ShipmentRoute
def consume_in_reverse(list):
while len(list) != 0:
yield list.pop()
def letters_for_numbers(str):
# dict of letters and the numerals they are intended to be
gaffes = {'i' : '1', 'l' : '1', 'o' : '0'}
# don't worry about case
numeralized = str.lower()
for g in gaffes.iterkeys():
try:
# replace each of the letters with its appropriate numeral
numeralized = numeralized.replace(g, gaffes[g])
except Exception, e:
print e
# return the string once all gaffes have been replaced
return numeralized
def reconcile_condition(token):
if token.isalpha():
if len(token) == 1:
if token.upper() in ["G", "I", "D", "L"]:
return token.upper()
else:
return None
else:
if token.upper() == "GOOD":
return "G"
elif token.upper() == "INCOMPLETE":
return "I"
elif token.upper() == "DAMAGED":
return "D"
elif token.upper() in ["ALTERNATE", "LOCATION"]:
return "L"
else:
return None
else:
return None
def reconcile_school_by_code(token):
if token.isdigit():
# using 3 or 4 for length, because here is the distribution of len(code):
# len(code) | frequency out of 5572 schools
# 1 | 18
# 2 | 159
# 3 | 240
# 4 | 5131
# 5 | 24
if len(token) in [3, 4]:
possible_by_code = School.closest_by_code(token)
if len(possible_by_code) == 1:
if possible_by_code[0][2] == 0 and possible_by_code[0][3] == 0 and possible_by_code[0][4] == 1.0:
return possible_by_code[0][1]
else:
return possible_by_code
else:
return None
else:
return None
def reconcile_school_by_spelling(token):
possible_by_name = School.closest_by_spelling(token)
if len(possible_by_name) == 1:
if possible_by_name[0][2] == 0 and possible_by_name[0][3] == 0 and possible_by_name[0][4] == 1.0:
return possible_by_name[0][1]
else:
return possible_by_name
def go():
print datetime.datetime.now().isoformat()
clean_db = True
if clean_db:
schools = School.objects.all().update(status=0)
print "reset schools"
districts = District.objects.all().update(status=None)
print "reset districts"
shipments = Shipment.objects.all().update(status='P')
shipments = Shipment.objects.all().update(actual_delivery_time=None)
print "reset shipments"
confirmations = Confirmation.objects.all().delete()
print "deleted confirmations"
sightings = ShipmentSighting.objects.all().delete()
print "deleted sightings"
routes = ShipmentRoute.objects.all().delete()
print "deleted routes"
cargos = Cargo.objects.all().delete()
print "deleted cargos"
incoming = Message.objects.filter(direction='I')
unique_text = []
unique = []
# make list of unique incoming messages, based on the message text
for mess in incoming:
if mess.text not in unique_text:
unique_text.append(mess.text)
unique.append(mess)
# all school names, split into individual words,
# flattened into 1-d list, duplicates removed
school_name_words = list(set(list(itertools.chain.from_iterable([n.split() for n in School.objects.all().values_list('name', flat=True)]))))
# odd punctuation we want to get rid of
junk = ['.', ',', '\'', '\"', '`', '(', ')', ':', ';', '&', '?', '!', '~', '`', '+', '-']
school_name_words_no_punc = []
for mark in junk:
for word in school_name_words:
# remove punctuation from school name words, because we'll be
# removing the same punctuation from message text
school_name_words_no_punc.append(word.replace(mark, " "))
# if user has spelled out any of the conditions, we want to see those,
# as well as "L" -- other conditions "G", "D", "I" already appear in school_name_words_no_punc
other_words = ["INCOMPLETE", "GOOD", "DAMAGED", "ALTERNATE", "LOCATION", "L"]
ok_words = school_name_words_no_punc + other_words
print len(unique)
counter = 0
matches = 0
#for text in ['CONFIRM BOOKS DADATA PRIMARY 1196i']:
#for msg in unique[45:55]:
for msg in unique:
counter = counter + 1
if counter % 100 == 0:
print "loop: %s" % str(counter)
text = msg.text
text_list = []
# replace any creative punctuation with spaces
for mark in junk:
text = text.replace(mark, " ")
# split the text into chunks around spaces
blobs = text.split(" ")
for blob in blobs:
clean_blob = blob
try:
if blob[-1:].isalpha() and blob[:-1].isdigit():
# if theres somthing like '1234g'
# add as two separate blobs: '1234' and 'g'
text_list.append(blob[:-1])
text_list.append(blob[-1:])
# and move on to next blob before
# letters_for_numbers might duplicate it incorrectly
continue
except IndexError:
pass
for n in range(3):
# clean up blobs only if they have a digit in the first few
# characters -- so we don't clean up things like user1
try:
if blob[n].isdigit():
clean_blob = letters_for_numbers(blob)
break
except IndexError:
# if the blob doesnt have the first few characters,
# and there is no digit yet, move on
break
# add the cleaned blob (or untouched blob) to a running list
text_list.append(clean_blob)
relevant = []
# now, loop through cleaned words and keep relevant ones
for word in text_list:
if word.isdigit():
relevant.append(word)
continue
if word.upper() in ok_words:
relevant.append(word)
continue
# attach list of relevant bits to message
confirmation = Confirmation(message=msg)
confirmation.token_list = copy.copy(relevant)
confirmation.save()
# now try to make sense of these tokens
consumed = []
unconsumed = []
# generator to yield relevant items in reverse order
consume = consume_in_reverse(relevant)
condition = None
school = None
school_by_code = None
school_by_spelling = None
try:
def attempt_consumption_of_condition_and_code(condition, school_by_code):
token = consume.next()
if condition is None:
condition = reconcile_condition(token)
if condition is not None:
consumed.append(token)
else:
if token not in unconsumed:
unconsumed.append(token)
# if the last token (the first we have examined) this time
# has been consumed, pop the next-to-last token.
# otherwise, we will continue with the last token
if token in consumed:
token = consume.next()
# note this may be a school object or a list of tuples
# in the format:
# ('token', school_obj, lev_edit_int, dl_edit_int, jw_float)
if school_by_code is None:
school_by_code = reconcile_school_by_code(token)
if school_by_code is not None:
consumed.append(token)
confirmation.code = token
confirmation.save()
else:
if token not in unconsumed:
unconsumed.append(token)
if len(consumed) == 2:
return condition, school_by_code
else:
return attempt_consumption_of_condition_and_code(condition, school_by_code)
# recursively consume tokens until we have something for condition and school_by_code
condition, school_by_code = attempt_consumption_of_condition_and_code(condition, school_by_code)
if not isinstance(school_by_code, list):
# woo! we have a condition and a single school, this is probably
# enough to be sure about the school, so save it as school before
# exploding into finding the school name
school = school_by_code
confirmation.school = school
confirmation.save()
try:
school_name = None
# pop the next-to-next-to-last token
token = consume.next()
# now lets try to get the school name
if token in consumed:
token = consume.next()
school_name = token
consumed.append(token)
# consume up to five additional tokens and
# prepend to school_name
token = consume.next()
if token.isalpha():
school_name = token + " " + school_name
consumed.append(token)
else:
unconsumed.append(token)
token = consume.next()
if token.isalpha():
school_name = token + " " + school_name
consumed.append(token)
else:
unconsumed.append(token)
token = consume.next()
if token.isalpha():
school_name = token + " " + school_name
consumed.append(token)
else:
unconsumed.append(token)
token = consume.next()
if token.isalpha():
school_name = token + " " + school_name
consumed.append(token)
else:
unconsumed.append(token)
token = consume.next()
if token.isalpha():
school_name = token + " " + school_name
consumed.append(token)
else:
unconsumed.append(token)
except StopIteration:
if school_name is not None:
school_by_spelling = reconcile_school_by_spelling(school_name.strip())
p_schools = []
if isinstance(school_by_code, list):
for s in (t[1] for t in school_by_code):
if s not in p_schools:
p_schools.append(s)
if school_by_spelling is not None:
if not isinstance(school_by_spelling, list):
if school is not None:
if school.code == school_by_spelling.code:
pass
else:
p_schools.append(school_by_spelling)
else:
school = school_by_spelling
else:
for s in (t[1] for t in school_by_spelling):
if s is not None:
if s.code not in [p.code for p in p_schools if p is not None]:
p_schools.append(s)
else:
school = s
# if we have no sure match, and a list of possible schools
# returned by reconcile_school_by_spelling, try toggling
# the word primary
if school is None and isinstance(school_by_spelling, list):
uschool_name = school_name.upper()
if uschool_name.find("PRIMARY") != -1:
edited_name = uschool_name.replace("PRIMARY", "")
else:
edited_name = uschool_name + " PRIMARY"
school_by_spelling = reconcile_school_by_spelling(edited_name.strip())
if school_by_spelling is not None:
if not isinstance(school_by_spelling, list):
if school is not None:
if school.code == school_by_spelling.code:
pass
else:
p_schools.append(school_by_spelling)
else:
school = school_by_spelling
else:
for s in (t[1] for t in school_by_spelling):
if s.code not in [p.code for p in p_schools]:
p_schools.append(s)
else:
school = s
if school is None:
confirmation.possible_schools = [s.pk for s in p_schools]
confirmation.save()
if condition is not None:
confirmation.condition = condition
confirmation.save()
if school is not None:
if condition is not None:
confirmation.condition = condition
confirmation.valid = True
confirmation.save()
matches = matches + 1
if matches % 20 == 0:
print "MATCHES: %s out of %s" % (str(matches), str(counter))
print datetime.datetime.now().isoformat()
commodity = Commodity.objects.get(slug__istartswith="textbooks")
facility, f_created = Facility.objects.get_or_create(location_id=school.pk,\
location_type=ContentType.objects.get(model='school'))
if facility is not None:
active_shipment = Facility.get_active_shipment(facility)
observed_cargo = Cargo.objects.create(\
commodity=commodity,\
condition=condition)
seen_by_str = msg.connection.backend.name + ":" + msg.connection.identity
# create a new ShipmentSighting
sighting = ShipmentSighting.objects.create(\
observed_cargo=observed_cargo,\
facility=facility, seen_by=seen_by_str)
# associate new Cargo with Shipment
active_shipment.status = 'D'
active_shipment.actual_delivery_time=msg.date
active_shipment.cargos.add(observed_cargo)
active_shipment.save()
# get or create a ShipmentRoute and associate
# with new ShipmentSighting
route, new_route = ShipmentRoute.objects.get_or_create(\
shipment=active_shipment)
route.sightings.add(sighting)
route.save()
if observed_cargo.condition is not None:
this_school = School.objects.get(pk=facility.location_id)
# map reported condition to the status numbers
# that the sparklines will use
map = {'G':1, 'D':-2, 'L':-3, 'I':-4}
if observed_cargo.condition in ['D', 'L', 'I', 'G']:
this_school.status = map[observed_cargo.condition]
else:
this_school.status = 0
this_school.save()
this_district = this_school.parent
# TODO optimize! this is very expensive
# and way too slow
# re-generate the list of statuses that
# the sparklines will use
#updated = this_district.spark
campaign = Campaign.get_active_campaign()
if campaign is not None:
campaign.shipments.add(active_shipment)
campaign.save()
'''
data = [
"of %s" % (commodity.slug or "??"),
"to %s" % (facility.location.name or "??"),
"in %s condition" % (observed_cargo.get_condition_display() or "??")
]
confirmation = "Thanks. Confirmed delivery of %s." %\
(" ".join(data))
print seen_by_str + " " + confirmation
'''
except StopIteration:
continue
except Exception, e:
print e
print counter
print matches
import ipdb;ipdb.set_trace()
print "MATCHES: %s" % str(matches)
districts = District.objects.all()
print "%s districts" % str(districts.count())
for d in districts:
print "updating sparks for '%s'" % d.name
d.spark
'''
1587 total incoming messages
746 unique phone numbers
1046 unique incoming messages (541 duplicates 34%)
757 parsed successfully
47.7% of total (1587)
72.4% of uniques (1046)
363 unique schools
We Maereka P.school are thanking you forUNICEF tx bks but we didnt receive any for Gd 3.
Kubatana primary under Mazowe district didnt receive textbook we dont know why we a trying to cal yr office bt no answer may u pliz help us
My unicef textbooks were left 58km away. Why? Who should foot the bill now? M. Nyathi (Buda primary)
Confirm books GWANGWALIBA PRIMARY SCHOOL- (G). Thank you!
HLATSHWAYO SCHOOL THANKS U 4 DONATION OF TXBOOKS.OUR SCHOOL CODE II36, NUMBER OFBKS I64O,GOOD CONDITION
'''
def stats():
map = {'1':'good', '-2':'damaged', '-3': 'alternate location', '-4':'incomplete', '0':'unknown'}
for province in Province.objects.all():
prov_stats = {'good' : 0,'damaged' : 0,'alternate location': 0,'incomplete' : 0,'unknown' : 0}
# print province.name + ' districts:'
for district in District.objects.all():
if district.parent.name == province.name:
# print district.name
status_list = district.status_as_list
for n in ['-4', '-3', '-2', '1', '0']:
stat_count = status_list.count(n)
# print str(stat_count) + ' ' + map[n]
parent_count = prov_stats[map[n]]
prov_stats[map[n]] = parent_count + int(stat_count)
print province.name + ' totals:'
for k,v in prov_stats.iteritems():
print str(v) + ' ' + k
print ' '