def test_contact_ward_field(self): temba_contact = TembaContact.create(uuid='C-0011', name="Jan", urns=['tel:123'], groups=['G-001', 'G-007'], fields={'registration_date': '2014-01-02T03:04:05.000000Z', 'state':'Lagos', 'lga': '', 'ward': 'Ikeja', 'occupation': 'Student', 'born': '1990', 'gender': 'Male'}, language='eng') kwargs = Contact.kwargs_from_temba(self.nigeria, temba_contact) # invalid parent boundary (district) will yield empty ward self.assertEqual(kwargs, dict(uuid='C-0011', org=self.nigeria, gender='M', born=1990, occupation='Student', registered_on=json_date_to_datetime('2014-01-02T03:04:05.000'), state='R-LAGOS', district='', ward='')) self.assertEqual(ReportersCounter.get_counts(self.nigeria), dict()) Contact.objects.create(uuid='C-007', org=self.nigeria, gender='M', born=1990, occupation='Student', registered_on=json_date_to_datetime('2014-01-02T03:04:05.000'), state='R-LAGOS', district='R-OYO', ward='R-IKEJA') field_count = ReportersCounter.get_counts(self.nigeria) self.assertEqual(field_count['ward:R-IKEJA'], 1) Contact.objects.create(uuid='C-008', org=self.nigeria, gender='M', born=1980, occupation='Teacher', registered_on=json_date_to_datetime('2014-01-02T03:07:05.000'), state='R-LAGOS', district='R-OYO', ward='R-IKEJA') field_count = ReportersCounter.get_counts(self.nigeria) self.assertEqual(field_count['ward:R-IKEJA'], 2) Contact.objects.all().delete()
def test_contact_ward_field(self): self.assertEqual(ReportersCounter.get_counts(self.nigeria), dict()) Contact.objects.create( uuid="C-007", org=self.nigeria, gender="M", born=1990, occupation="Student", registered_on=json_date_to_datetime("2014-01-02T03:04:05.000"), state="R-LAGOS", district="R-OYO", ward="R-IKEJA", ) field_count = ReportersCounter.get_counts(self.nigeria) self.assertEqual(field_count["ward:R-IKEJA"], 1) Contact.objects.create( uuid="C-008", org=self.nigeria, gender="M", born=1980, occupation="Teacher", registered_on=json_date_to_datetime("2014-01-02T03:07:05.000"), state="R-LAGOS", district="R-OYO", ward="R-IKEJA", ) field_count = ReportersCounter.get_counts(self.nigeria) self.assertEqual(field_count["ward:R-IKEJA"], 2) Contact.objects.all().delete()
def test_contact_ward_field(self): self.assertEqual(ReportersCounter.get_counts(self.nigeria), dict()) Contact.objects.create( uuid='C-007', org=self.nigeria, gender='M', born=1990, occupation='Student', registered_on=json_date_to_datetime('2014-01-02T03:04:05.000'), state='R-LAGOS', district='R-OYO', ward='R-IKEJA') field_count = ReportersCounter.get_counts(self.nigeria) self.assertEqual(field_count['ward:R-IKEJA'], 1) Contact.objects.create( uuid='C-008', org=self.nigeria, gender='M', born=1980, occupation='Teacher', registered_on=json_date_to_datetime('2014-01-02T03:07:05.000'), state='R-LAGOS', district='R-OYO', ward='R-IKEJA') field_count = ReportersCounter.get_counts(self.nigeria) self.assertEqual(field_count['ward:R-IKEJA'], 2) Contact.objects.all().delete()
def test_reporters_counter(self): self.assertEqual(ReportersCounter.get_counts(self.nigeria), dict()) Contact.objects.create(uuid='C-007', org=self.nigeria, gender='M', born=1990, occupation='Student', registered_on=json_date_to_datetime('2014-01-02T03:04:05.000'), state='R-LAGOS', district='R-OYO') expected = dict() expected['total-reporters'] = 1 expected['gender:m'] = 1 expected['occupation:student'] = 1 expected['born:1990'] = 1 expected['registered_on:2014-01-02'] = 1 expected['state:R-LAGOS'] = 1 expected['district:R-OYO'] = 1 self.assertEqual(ReportersCounter.get_counts(self.nigeria), expected) Contact.objects.create(uuid='C-008', org=self.nigeria, gender='M', born=1980, occupation='Teacher', registered_on=json_date_to_datetime('2014-01-02T03:07:05.000'), state='R-LAGOS', district='R-OYO') expected = dict() expected['total-reporters'] = 2 expected['gender:m'] = 2 expected['occupation:student'] = 1 expected['occupation:teacher'] = 1 expected['born:1990'] = 1 expected['born:1980'] = 1 expected['registered_on:2014-01-02'] = 2 expected['state:R-LAGOS'] = 2 expected['district:R-OYO'] = 2 self.assertEqual(ReportersCounter.get_counts(self.nigeria), expected)
def test_datetime_to_json_date(self): d1 = datetime(2014, 1, 2, 3, 4, 5, tzinfo=pytz.utc) self.assertEqual(datetime_to_json_date(d1), '2014-01-02T03:04:05.000Z') self.assertEqual(json_date_to_datetime('2014-01-02T03:04:05.000Z'), d1) self.assertEqual(json_date_to_datetime('2014-01-02T03:04:05.000'), d1) tz = pytz.timezone("Africa/Kigali") d2 = tz.localize(datetime(2014, 1, 2, 3, 4, 5)) self.assertEqual(datetime_to_json_date(d2), '2014-01-02T01:04:05.000Z') self.assertEqual(json_date_to_datetime('2014-01-02T01:04:05.000Z'), d2.astimezone(pytz.utc)) self.assertEqual(json_date_to_datetime('2014-01-02T01:04:05.000'), d2.astimezone(pytz.utc))
def test_reporters_counter(self): self.assertEqual(ReportersCounter.get_counts(self.nigeria), dict()) Contact.objects.create( uuid='C-007', org=self.nigeria, gender='M', born=1990, occupation='Student', registered_on=json_date_to_datetime('2014-01-02T03:04:05.000'), state='R-LAGOS', district='R-OYO') expected = dict() expected['total-reporters'] = 1 expected['gender:m'] = 1 expected['occupation:student'] = 1 expected['born:1990'] = 1 expected['registered_on:2014-01-02'] = 1 expected['state:R-LAGOS'] = 1 expected['district:R-OYO'] = 1 self.assertEqual(ReportersCounter.get_counts(self.nigeria), expected) Contact.objects.create( uuid='C-008', org=self.nigeria, gender='M', born=1980, occupation='Teacher', registered_on=json_date_to_datetime('2014-01-02T03:07:05.000'), state='R-LAGOS', district='R-OYO') expected = dict() expected['total-reporters'] = 2 expected['gender:m'] = 2 expected['occupation:student'] = 1 expected['occupation:teacher'] = 1 expected['born:1990'] = 1 expected['born:1980'] = 1 expected['registered_on:2014-01-02'] = 2 expected['state:R-LAGOS'] = 2 expected['district:R-OYO'] = 2 self.assertEqual(ReportersCounter.get_counts(self.nigeria), expected) self.assertEqual( ReportersCounter.get_counts(self.nigeria, ['total-reporters', 'gender:m']), { 'total-reporters': 2, 'gender:m': 2 })
def test_reporters_counter(self): self.assertEqual(ReportersCounter.get_counts(self.nigeria), dict()) Contact.objects.create( uuid="C-007", org=self.nigeria, gender="M", born=1990, occupation="Student", registered_on=json_date_to_datetime("2014-01-02T03:04:05.000"), state="R-LAGOS", district="R-OYO", ) expected = dict() expected["total-reporters"] = 1 expected["gender:m"] = 1 expected["occupation:student"] = 1 expected["born:1990"] = 1 expected["registered_on:2014-01-02"] = 1 expected["state:R-LAGOS"] = 1 expected["district:R-OYO"] = 1 self.assertEqual(ReportersCounter.get_counts(self.nigeria), expected) Contact.objects.create( uuid="C-008", org=self.nigeria, gender="M", born=1980, occupation="Teacher", registered_on=json_date_to_datetime("2014-01-02T03:07:05.000"), state="R-LAGOS", district="R-OYO", ) expected = dict() expected["total-reporters"] = 2 expected["gender:m"] = 2 expected["occupation:student"] = 1 expected["occupation:teacher"] = 1 expected["born:1990"] = 1 expected["born:1980"] = 1 expected["registered_on:2014-01-02"] = 2 expected["state:R-LAGOS"] = 2 expected["district:R-OYO"] = 2 self.assertEqual(ReportersCounter.get_counts(self.nigeria), expected) self.assertEqual( ReportersCounter.get_counts(self.nigeria, ["total-reporters", "gender:m"]), {"total-reporters": 2, "gender:m": 2}, )
def test_kwargs_from_temba(self): temba_contact = TembaContact.create(uuid='C-006', name="Jan", urns=['tel:123'], groups=['G-001', 'G-007'], fields={'registration_date': None, 'state': None, 'lga': None, 'occupation': None, 'born': None, 'gender': None}, language='eng') kwargs = Contact.kwargs_from_temba(self.nigeria, temba_contact) self.assertEqual(kwargs, dict(uuid='C-006', org=self.nigeria, gender='', born=0, occupation='', registered_on=None, state='', district='', ward='')) # try creating contact from them Contact.objects.create(**kwargs) # Invalid boundaries become '' temba_contact = TembaContact.create(uuid='C-007', name="Jan", urns=['tel:123'], groups=['G-001', 'G-007'], fields={'registration_date': '2014-01-02T03:04:05.000000Z', 'state': 'Kigali', 'lga': 'Oyo', 'occupation': 'Student', 'born': '1990', 'gender': 'Male'}, language='eng') kwargs = Contact.kwargs_from_temba(self.nigeria, temba_contact) self.assertEqual(kwargs, dict(uuid='C-007', org=self.nigeria, gender='M', born=1990, occupation='Student', registered_on=json_date_to_datetime('2014-01-02T03:04:05.000'), state='', district='', ward='')) # try creating contact from them Contact.objects.create(**kwargs) temba_contact = TembaContact.create(uuid='C-008', name="Jan", urns=['tel:123'], groups=['G-001', 'G-007'], fields={'registration_date': '2014-01-02T03:04:05.000000Z', 'state':'Lagos', 'lga': 'Oyo', 'ward': 'Ikeja', 'occupation': 'Student', 'born': '1990', 'gender': 'Male'}, language='eng') kwargs = Contact.kwargs_from_temba(self.nigeria, temba_contact) self.assertEqual(kwargs, dict(uuid='C-008', org=self.nigeria, gender='M', born=1990, occupation='Student', registered_on=json_date_to_datetime('2014-01-02T03:04:05.000'), state='R-LAGOS', district='R-OYO', ward='R-IKEJA')) # try creating contact from them Contact.objects.create(**kwargs)
def populate_poll_poll_date(apps, schema_editor): Poll = apps.get_model("polls", "Poll") Org = apps.get_model("orgs", "Org") agent = getattr(settings, "SITE_API_USER_AGENT", None) host = settings.SITE_API_HOST for org in Org.objects.all(): temba_client = TembaClient(host, org.api_token, user_agent=agent) api_flows = temba_client.get_flows() flows_date = dict() for flow in api_flows: flows_date[flow.uuid] = datetime_to_json_date(flow.created_on) for poll in Poll.objects.filter(org=org): json_date = flows_date.get(poll.flow_uuid, None) if json_date: date = json_date_to_datetime(json_date) else: logger.info( "using created_on for flow_date on poll with id %s" % poll.pk) date = poll.created_on poll.poll_date = date poll.save()
def pre_save(self, obj): obj = super(PollCRUDL.PollFlow, self).pre_save(obj) obj.org = self.request.org now = timezone.now() five_minutes_ago = now - timedelta(minutes=5) similar_poll = Poll.objects.filter( org=obj.org, flow_uuid=obj.flow_uuid, backend=obj.backend, is_active=True, created_on__gte=five_minutes_ago, ).first() if similar_poll: obj = similar_poll flow = obj.get_flow() date = flow.get("created_on", None) if date: flow_date = json_date_to_datetime(date) else: flow_date = timezone.now() obj.poll_date = flow_date return obj
def test_fetch_flows(self, mock_get_flows): mock_get_flows.side_effect = [ MockClientQuery([Flow.create(name='Flow 1', uuid='uuid-25', labels=[], archived=False, expires=720, created_on=json_date_to_datetime("2015-04-08T12:48:44.320Z"), runs=Flow.Runs.create(completed=120, active=50, expired=100, interrupted=30)) ])] with patch("ureport.utils.datetime_to_ms") as mock_datetime_ms: mock_datetime_ms.return_value = 500 with patch('django.core.cache.cache.set') as cache_set_mock: flows = fetch_flows(self.org, self.rapidpro_backend) expected = dict() expected['uuid-25'] = dict(uuid='uuid-25', date_hint="2015-04-08", created_on="2015-04-08T12:48:44.320Z", name="Flow 1", runs=300, completed_runs=120, archived=False) self.assertEqual(flows, expected) cache_set_mock.assert_called_once_with('org:%d:backend:%s:flows' % (self.org.pk, self.rapidpro_backend.slug), dict(time=500, results=expected), UREPORT_ASYNC_FETCHED_DATA_CACHE_TIME)
def test_tasks(self): with self.settings(CACHES={'default': {'BACKEND': 'redis_cache.cache.RedisCache', 'LOCATION': '127.0.0.1:6379:1', 'OPTIONS': {'CLIENT_CLASS': 'redis_cache.client.DefaultClient'} }}): with patch('ureport.contacts.tasks.Contact.fetch_contacts') as mock_fetch_contacts: with patch('ureport.contacts.tasks.Boundary.fetch_boundaries') as mock_fetch_boundaries: with patch('ureport.contacts.tasks.ContactField.fetch_contact_fields') as mock_fetch_contact_fields: mock_fetch_contacts.return_value = 'FETCHED' mock_fetch_boundaries.return_value = 'FETCHED' mock_fetch_contact_fields.return_value = 'FETCHED' fetch_contacts_task(self.nigeria.pk, True) mock_fetch_contacts.assert_called_once_with(self.nigeria, after=None) mock_fetch_boundaries.assert_called_with(self.nigeria) mock_fetch_contact_fields.assert_called_with(self.nigeria) self.assertEqual(mock_fetch_boundaries.call_count, 2) self.assertEqual(mock_fetch_contact_fields.call_count, 2) mock_fetch_contacts.reset_mock() mock_fetch_boundaries.reset_mock() mock_fetch_contact_fields.reset_mock() with patch('django.core.cache.cache.get') as cache_get_mock: date_str = '2014-01-02T01:04:05.000Z' d1 = json_date_to_datetime(date_str) cache_get_mock.return_value = date_str fetch_contacts_task(self.nigeria.pk) mock_fetch_contacts.assert_called_once_with(self.nigeria, after=d1) self.assertFalse(mock_fetch_boundaries.called) self.assertFalse(mock_fetch_contact_fields.called)
def test_fetch_flows(self, mock_get_flows): mock_get_flows.side_effect = [ MockClientQuery( [ Flow.create( name="Flow 1", uuid="uuid-25", labels=[], archived=False, expires=720, created_on=json_date_to_datetime("2015-04-08T12:48:44.320Z"), results=[ Flow.FlowResult.create( key="color", name="Color", categories=["Orange", "Blue", "Other", "Nothing"], node_uuids=["42a8e177-9e88-429b-b70a-7d4854423092"], ) ], runs=Flow.Runs.create(completed=120, active=50, expired=100, interrupted=30), ) ] ) ] with patch("ureport.utils.datetime_to_ms") as mock_datetime_ms: mock_datetime_ms.return_value = 500 with patch("django.core.cache.cache.set") as cache_set_mock: flows = fetch_flows(self.org, self.rapidpro_backend) expected = dict() expected["uuid-25"] = dict( uuid="uuid-25", date_hint="2015-04-08", created_on="2015-04-08T12:48:44.320Z", name="Flow 1", runs=300, completed_runs=120, archived=False, results=[ dict( key="color", name="Color", categories=["Orange", "Blue", "Other", "Nothing"], node_uuids=["42a8e177-9e88-429b-b70a-7d4854423092"], ) ], ) self.assertEqual(flows, expected) cache_set_mock.assert_called_once_with( "org:%d:backend:%s:flows" % (self.org.pk, self.rapidpro_backend.slug), dict(time=500, results=expected), UREPORT_ASYNC_FETCHED_DATA_CACHE_TIME, )
def get_sync_status(self, obj): if obj.has_synced: last_synced = cache.get(Poll.POLL_RESULTS_LAST_SYNC_TIME_CACHE_KEY % (obj.org.pk, obj.flow_uuid), None) if last_synced: return "Last synced %s ago" % timesince(json_date_to_datetime(last_synced)) # we know we synced do not check the the progress since that is slow return "Synced 100%" sync_progress = obj.get_sync_progress() return "Syncing... {0:.1f}%".format(sync_progress)
def pre_save(self, obj): obj = super(PollCRUDL.Create, self).pre_save(obj) obj.org = self.request.org flow = obj.get_flow() date = flow.get('created_on', None) if date: flow_date = json_date_to_datetime(date) else: flow_date = timezone.now() obj.poll_date = flow_date return obj
def fetch_contacts(cls, org, after=None): print "START== Fetching contacts for %s" % org.name reporter_group = org.get_config('reporter_group') temba_client = org.get_temba_client() api_groups = temba_client.get_groups(name=reporter_group) if not api_groups: return seen_uuids = [] group_uuid = None for grp in api_groups: if grp.name.lower() == reporter_group.lower(): group_uuid = grp.uuid break now = timezone.now().replace(tzinfo=pytz.utc) before = now if not after: # consider the after year 2013 after = json_date_to_datetime("2013-01-01T00:00:00.000") while before > after: pager = temba_client.pager() api_contacts = temba_client.get_contacts(before=before, after=after, pager=pager) last_contact_index = len(api_contacts) - 1 for i, contact in enumerate(api_contacts): if i == last_contact_index: before = contact.modified_on if group_uuid in contact.groups: cls.update_or_create_from_temba(org, contact) seen_uuids.append(contact.uuid) if not pager.has_more(): cache.set(cls.CONTACT_LAST_FETCHED_CACHE_KEY % org.pk, datetime_to_json_date(now.replace(tzinfo=pytz.utc)), cls.CONTACT_LAST_FETCHED_CACHE_TIMEOUT) break return seen_uuids
def fetch_contacts_task(org_id=None, fetch_all=False): r = get_redis_connection() key = 'fetch_contacts' lock_timeout = 3600 if org_id: key = 'fetch_contacts:%d' % org_id lock_timeout = 300 if not r.get(key): with r.lock(key, timeout=lock_timeout): active_orgs = Org.objects.filter(is_active=True) if org_id: active_orgs = Org.objects.filter(pk=org_id) for org in active_orgs: start = time.time() last_fetched_key = Contact.CONTACT_LAST_FETCHED_CACHE_KEY % org.id after = cache.get(last_fetched_key, None) if after: after = json_date_to_datetime(after) if fetch_all: after = None try: if after is None: Boundary.fetch_boundaries(org) ContactField.fetch_contact_fields(org) Boundary.get_boundaries(org) ContactField.get_contact_fields(org) Contact.fetch_contacts(org, after=after) print "Task: fetch_contacts for %s took %ss" % (org.name, time.time() - start) except Exception as e: import traceback traceback.print_exc() logger.exception("Error fetching contacts: %s" % str(e))
def clean(self): cleaned_data = self.cleaned_data poll_date = cleaned_data.get("poll_date") flow_uuid = cleaned_data.get("flow_uuid") flows = self.org.get_flows(self.backend) flow = flows.get(flow_uuid) if not poll_date and flow: date = flow.get("created_on", None) if date: poll_date = json_date_to_datetime(date) if not poll_date: poll_date = timezone.now() cleaned_data["poll_date"] = poll_date return cleaned_data
def clean(self): cleaned_data = self.cleaned_data poll_date = cleaned_data.get('poll_date') flow_uuid = cleaned_data.get('flow_uuid') flows = self.org.get_flows() flow = flows.get(flow_uuid) if not poll_date and flow: date = flow.get('created_on', None) if date: poll_date = json_date_to_datetime(date) if not poll_date: poll_date = timezone.now() cleaned_data['poll_date'] = poll_date return cleaned_data
def test_datetime_to_json_date(self): d1 = datetime(2014, 1, 2, 3, 4, 5, tzinfo=pytz.utc) self.assertEqual(datetime_to_json_date(d1), "2014-01-02T03:04:05.000Z") self.assertEqual(json_date_to_datetime("2014-01-02T03:04:05.000+00:00"), d1) self.assertEqual(json_date_to_datetime("2014-01-02T03:04:05.000Z"), d1) self.assertEqual(json_date_to_datetime("2014-01-02T03:04:05.000"), d1) tz = pytz.timezone("Africa/Kigali") d2 = tz.localize(datetime(2014, 1, 2, 3, 4, 5)) self.assertEqual(datetime_to_json_date(d2), "2014-01-02T01:04:05.000Z") self.assertEqual(json_date_to_datetime("2014-01-02T03:04:05+02:00"), d2) self.assertEqual(json_date_to_datetime("2014-01-02T01:04:05.000Z"), d2) self.assertEqual(json_date_to_datetime("2014-01-02T01:04:05.000"), d2)
def get_sync_status(self, obj): if obj.has_synced: r = get_redis_connection() key = Poll.POLL_PULL_RESULTS_TASK_LOCK % (obj.org.pk, obj.flow_uuid) if r.get(key): return _("Scheduled Sync currently in progress...") last_synced = cache.get( Poll.POLL_RESULTS_LAST_SYNC_TIME_CACHE_KEY % (obj.org.pk, obj.flow_uuid), None) if last_synced: return _("Last results synced %(time)s ago" % dict( time=timesince(json_date_to_datetime(last_synced)))) # we know we synced do not check the the progress since that is slow return _("Synced") sync_progress = obj.get_sync_progress() return _(f"Sync currently in progress... {sync_progress:.1f}%")
def test_fetch_flows(self, mock_get_flows): mock_get_flows.side_effect = [ MockClientQuery( [ Flow.create( name="Flow 1", uuid="uuid-25", labels=[], archived=False, expires=720, created_on=json_date_to_datetime("2015-04-08T12:48:44.320Z"), runs=Flow.Runs.create(completed=120, active=50, expired=100, interrupted=30), ) ] ) ] with patch("ureport.utils.datetime_to_ms") as mock_datetime_ms: mock_datetime_ms.return_value = 500 with patch("django.core.cache.cache.set") as cache_set_mock: flows = fetch_flows(self.org, self.rapidpro_backend) expected = dict() expected["uuid-25"] = dict( uuid="uuid-25", date_hint="2015-04-08", created_on="2015-04-08T12:48:44.320Z", name="Flow 1", runs=300, completed_runs=120, archived=False, ) self.assertEqual(flows, expected) cache_set_mock.assert_called_once_with( "org:%d:backend:%s:flows" % (self.org.pk, self.rapidpro_backend.slug), dict(time=500, results=expected), UREPORT_ASYNC_FETCHED_DATA_CACHE_TIME, )
def populate_poll_poll_date(apps, schema_editor): Poll = apps.get_model('polls', "Poll") Org = apps.get_model('orgs', "Org") agent = getattr(settings, 'SITE_API_USER_AGENT', None) host = settings.SITE_API_HOST for org in Org.objects.all(): temba_client = TembaClient(host, org.api_token, user_agent=agent) api_flows = temba_client.get_flows() flows_date = dict() for flow in api_flows: flows_date[flow.uuid] = datetime_to_json_date(flow.created_on) for poll in Poll.objects.filter(org=org): json_date = flows_date.get(poll.flow_uuid, None) if json_date: date = json_date_to_datetime(json_date) else: print "using created_on for flow_date on poll with id %s" % poll.pk date = poll.created_on poll.poll_date = date poll.save()
def pull_results(self, poll, modified_after, modified_before, progress_callback=None): org = poll.org r = get_redis_connection() key = Poll.POLL_PULL_RESULTS_TASK_LOCK % (org.pk, poll.flow_uuid) stats_dict = dict( num_val_created=0, num_val_updated=0, num_val_ignored=0, num_path_created=0, num_path_updated=0, num_path_ignored=0, num_synced=0, ) if r.get(key): logger.info("Skipping pulling results for poll #%d on org #%d as it is still running" % (poll.pk, org.pk)) else: with r.lock(key, timeout=Poll.POLL_SYNC_LOCK_TIMEOUT): lock_expiration = time.time() + 0.8 * Poll.POLL_SYNC_LOCK_TIMEOUT client = self._get_client(org, 2) questions_uuids = poll.get_question_uuids() # ignore the TaskState time and use the time we stored in redis ( after, before, latest_synced_obj_time, batches_latest, resume_cursor, pull_after_delete, ) = poll.get_pull_cached_params() if pull_after_delete is not None: after = None latest_synced_obj_time = None batches_latest = None resume_cursor = None poll.delete_poll_results() pull_refresh_from_archives.apply_async((poll.pk,), queue="sync") if resume_cursor is None: before = datetime_to_json_date(timezone.now()) after = latest_synced_obj_time start = time.time() logger.info("Start fetching runs for poll #%d on org #%d" % (poll.pk, org.pk)) poll_runs_query = client.get_runs(flow=poll.flow_uuid, after=after, before=before) fetches = poll_runs_query.iterfetches(retry_on_rate_exceed=True, resume_cursor=resume_cursor) try: fetch_start = time.time() for fetch in fetches: logger.info( "RapidPro API fetch for poll #%d " "on org #%d %d - %d took %ds" % ( poll.pk, org.pk, stats_dict["num_synced"], stats_dict["num_synced"] + len(fetch), time.time() - fetch_start, ) ) contacts_map, poll_results_map, poll_results_to_save_map = self._initiate_lookup_maps( fetch, org, poll ) for temba_run in fetch: if batches_latest is None or temba_run.modified_on > json_date_to_datetime(batches_latest): batches_latest = datetime_to_json_date(temba_run.modified_on.replace(tzinfo=pytz.utc)) contact_obj = contacts_map.get(temba_run.contact.uuid, None) self._process_run_poll_results( org, questions_uuids, temba_run, contact_obj, poll_results_map, poll_results_to_save_map, stats_dict, ) stats_dict["num_synced"] += len(fetch) if progress_callback: progress_callback(stats_dict["num_synced"]) self._save_new_poll_results_to_database(poll_results_to_save_map) logger.info( "Processed fetch of %d - %d " "runs for poll #%d on org #%d" % (stats_dict["num_synced"] - len(fetch), stats_dict["num_synced"], poll.pk, org.pk) ) fetch_start = time.time() logger.info("=" * 40) if ( stats_dict["num_synced"] >= Poll.POLL_RESULTS_MAX_SYNC_RUNS or time.time() > lock_expiration ): poll.rebuild_poll_results_counts() cursor = fetches.get_cursor() self._mark_poll_results_sync_paused(org, poll, cursor, after, before, batches_latest) logger.info( "Break pull results for poll #%d on org #%d in %ds, " " Times: after= %s, before= %s, batch_latest= %s, sync_latest= %s" " Objects: created %d, updated %d, ignored %d. " "Before cursor %s" % ( poll.pk, org.pk, time.time() - start, after, before, batches_latest, latest_synced_obj_time, stats_dict["num_val_created"], stats_dict["num_val_updated"], stats_dict["num_val_ignored"], cursor, ) ) return ( stats_dict["num_val_created"], stats_dict["num_val_updated"], stats_dict["num_val_ignored"], stats_dict["num_path_created"], stats_dict["num_path_updated"], stats_dict["num_path_ignored"], ) except TembaRateExceededError: poll.rebuild_poll_results_counts() cursor = fetches.get_cursor() self._mark_poll_results_sync_paused(org, poll, cursor, after, before, batches_latest) logger.info( "Break pull results for poll #%d on org #%d in %ds, " " Times: after= %s, before= %s, batch_latest= %s, sync_latest= %s" " Objects: created %d, updated %d, ignored %d. " "Before cursor %s" % ( poll.pk, org.pk, time.time() - start, after, before, batches_latest, latest_synced_obj_time, stats_dict["num_val_created"], stats_dict["num_val_updated"], stats_dict["num_val_ignored"], cursor, ) ) return ( stats_dict["num_val_created"], stats_dict["num_val_updated"], stats_dict["num_val_ignored"], stats_dict["num_path_created"], stats_dict["num_path_updated"], stats_dict["num_path_ignored"], ) if batches_latest is not None and ( latest_synced_obj_time is None or json_date_to_datetime(latest_synced_obj_time) <= json_date_to_datetime(batches_latest) ): latest_synced_obj_time = batches_latest self._mark_poll_results_sync_completed(poll, org, latest_synced_obj_time) # from django.db import connection as db_connection, reset_queries # slowest_queries = sorted(db_connection.queries, key=lambda q: q['time'], reverse=True)[:10] # for q in slowest_queries: # print "=" * 60 # print "\n\n\n" # print "%s -- %s" % (q['time'], q['sql']) # reset_queries() logger.info( "Finished pulling results for poll #%d on org #%d runs in %ds, " "Times: sync_latest= %s," "Objects: created %d, updated %d, ignored %d" % ( poll.pk, org.pk, time.time() - start, latest_synced_obj_time, stats_dict["num_val_created"], stats_dict["num_val_updated"], stats_dict["num_val_ignored"], ) ) return ( stats_dict["num_val_created"], stats_dict["num_val_updated"], stats_dict["num_val_ignored"], stats_dict["num_path_created"], stats_dict["num_path_updated"], stats_dict["num_path_ignored"], )
def test_local_kwargs(self): temba_contact = TembaContact.create( uuid="C-006", name="Jan", urns=["tel:123"], groups=[ObjectRef.create(uuid="G-001", name="ureporters"), ObjectRef.create(uuid="G-007", name="Actors")], fields={ "registration_date": None, "state": None, "lga": None, "occupation": None, "born": None, "gender": None, }, language="eng", ) self.assertEqual( self.syncer.local_kwargs(self.nigeria, temba_contact), { "backend": self.floip_backend, "org": self.nigeria, "uuid": "C-006", "gender": "", "born": 0, "occupation": "", "registered_on": None, "state": "", "district": "", "ward": "", }, ) temba_contact = TembaContact.create( uuid="C-007", name="Jan", urns=["tel:123"], groups=[ObjectRef.create(uuid="G-001", name="ureporters"), ObjectRef.create(uuid="G-007", name="Actors")], fields={ "registration_date": "2014-01-02T03:04:05.000000Z", "state": "Kigali", "lga": "Oyo", "occupation": "Student", "born": "1990", "gender": "Male", }, language="eng", ) self.assertEqual( self.syncer.local_kwargs(self.nigeria, temba_contact), { "backend": self.floip_backend, "org": self.nigeria, "uuid": "C-007", "gender": "M", "born": 1990, "occupation": "Student", "registered_on": json_date_to_datetime("2014-01-02T03:04:05.000"), "state": "", "district": "", "ward": "", }, ) temba_contact = TembaContact.create( uuid="C-008", name="Jan", urns=["tel:123"], groups=[ObjectRef.create(uuid="G-001", name="ureporters"), ObjectRef.create(uuid="G-007", name="Actors")], fields={ "registration_date": "2014-01-02T03:04:05.000000Z", "state": "Lagos", "lga": "Oyo", "ward": "Ikeja", "occupation": "Student", "born": "1990", "gender": "Male", }, language="eng", ) self.assertEqual( self.syncer.local_kwargs(self.nigeria, temba_contact), { "backend": self.floip_backend, "org": self.nigeria, "uuid": "C-008", "gender": "M", "born": 1990, "occupation": "Student", "registered_on": json_date_to_datetime("2014-01-02T03:04:05.000"), "state": "R-LAGOS", "district": "R-OYO", "ward": "R-IKEJA", }, ) temba_contact = TembaContact.create( uuid="C-008", name="Jan", urns=["tel:123"], groups=[ObjectRef.create(uuid="G-001", name="ureporters"), ObjectRef.create(uuid="G-007", name="Actors")], fields={ "registration_date": "2014-01-02T03:04:05.000000Z", "state": "Lagos", "lga": "Oyo", "occupation": "Student", "born": "-1", "gender": "Male", }, language="eng", ) self.assertEqual( self.syncer.local_kwargs(self.nigeria, temba_contact), { "backend": self.floip_backend, "org": self.nigeria, "uuid": "C-008", "gender": "M", "born": 0, "occupation": "Student", "registered_on": json_date_to_datetime("2014-01-02T03:04:05.000"), "state": "R-LAGOS", "district": "R-OYO", "ward": "", }, ) temba_contact = TembaContact.create( uuid="C-008", name="Jan", urns=["tel:123"], groups=[ObjectRef.create(uuid="G-001", name="ureporters"), ObjectRef.create(uuid="G-007", name="Actors")], fields={ "registration_date": "2014-01-02T03:04:05.000000Z", "state": "Lagos", "lga": "Oyo", "occupation": "Student", "born": "2147483648", "gender": "Male", }, language="eng", ) self.assertEqual( self.syncer.local_kwargs(self.nigeria, temba_contact), { "backend": self.floip_backend, "org": self.nigeria, "uuid": "C-008", "gender": "M", "born": 0, "occupation": "Student", "registered_on": json_date_to_datetime("2014-01-02T03:04:05.000"), "state": "R-LAGOS", "district": "R-OYO", "ward": "", }, )
def test_reporters_counter(self): self.assertEqual(ReportersCounter.get_counts(self.nigeria), dict()) Contact.objects.create( uuid="C-007", org=self.nigeria, gender="M", born=1990, occupation="Student", registered_on=json_date_to_datetime("2014-01-02T03:04:05.000"), state="R-LAGOS", district="R-OYO", ) expected = dict() expected["total-reporters"] = 1 expected["gender:m"] = 1 expected["occupation:student"] = 1 expected["born:1990"] = 1 expected["registered_on:2014-01-02"] = 1 expected["registered_gender:2014-01-01:m"] = 1 expected["registered_born:2014-01-01:1990"] = 1 expected["registered_state:2014-01-01:R-LAGOS"] = 1 expected["state:R-LAGOS"] = 1 expected["district:R-OYO"] = 1 self.assertEqual(ReportersCounter.get_counts(self.nigeria), expected) Contact.objects.create( uuid="C-008", org=self.nigeria, gender="M", born=1980, occupation="Teacher", registered_on=json_date_to_datetime("2014-01-02T03:07:05.000"), state="R-LAGOS", district="R-OYO", ) expected = dict() expected["total-reporters"] = 2 expected["gender:m"] = 2 expected["occupation:student"] = 1 expected["occupation:teacher"] = 1 expected["born:1990"] = 1 expected["born:1980"] = 1 expected["registered_on:2014-01-02"] = 2 expected["registered_gender:2014-01-01:m"] = 2 expected["registered_born:2014-01-01:1990"] = 1 expected["registered_born:2014-01-01:1980"] = 1 expected["registered_state:2014-01-01:R-LAGOS"] = 2 expected["state:R-LAGOS"] = 2 expected["district:R-OYO"] = 2 self.assertEqual(ReportersCounter.get_counts(self.nigeria), expected) self.assertEqual( ReportersCounter.get_counts(self.nigeria, ["total-reporters", "gender:m"]), { "total-reporters": 2, "gender:m": 2 }, )
def pull_results(self, poll, modified_after, modified_before, progress_callback=None): org = poll.org r = get_redis_connection() key = Poll.POLL_PULL_RESULTS_TASK_LOCK % (org.pk, poll.flow_uuid) stats_dict = dict( num_val_created=0, num_val_updated=0, num_val_ignored=0, num_path_created=0, num_path_updated=0, num_path_ignored=0, num_synced=0, ) if poll.stopped_syncing: return ( stats_dict["num_val_created"], stats_dict["num_val_updated"], stats_dict["num_val_ignored"], stats_dict["num_path_created"], stats_dict["num_path_updated"], stats_dict["num_path_ignored"], ) if r.get(key): logger.info( "Skipping pulling results for poll #%d on org #%d as it is still running" % (poll.pk, org.pk)) else: with r.lock(key, timeout=Poll.POLL_SYNC_LOCK_TIMEOUT): lock_expiration = time.time( ) + 0.8 * Poll.POLL_SYNC_LOCK_TIMEOUT client = self._get_client(org, 2) questions_uuids = poll.get_question_uuids() # ignore the TaskState time and use the time we stored in redis ( after, before, latest_synced_obj_time, batches_latest, resume_cursor, pull_after_delete, ) = poll.get_pull_cached_params() if pull_after_delete is not None: after = None latest_synced_obj_time = None batches_latest = None resume_cursor = None poll.delete_poll_results() pull_refresh_from_archives.apply_async((poll.pk, ), queue="sync") if resume_cursor is None: before = datetime_to_json_date(timezone.now()) after = latest_synced_obj_time start = time.time() logger.info("Start fetching runs for poll #%d on org #%d" % (poll.pk, org.pk)) poll_runs_query = client.get_runs(flow=poll.flow_uuid, after=after, before=before) fetches = poll_runs_query.iterfetches( retry_on_rate_exceed=True, resume_cursor=resume_cursor) try: fetch_start = time.time() for fetch in fetches: logger.info("RapidPro API fetch for poll #%d " "on org #%d %d - %d took %ds" % ( poll.pk, org.pk, stats_dict["num_synced"], stats_dict["num_synced"] + len(fetch), time.time() - fetch_start, )) contacts_map, poll_results_map, poll_results_to_save_map = self._initiate_lookup_maps( fetch, org, poll) for temba_run in fetch: if batches_latest is None or temba_run.modified_on > json_date_to_datetime( batches_latest): batches_latest = datetime_to_json_date( temba_run.modified_on.replace( tzinfo=pytz.utc)) contact_obj = contacts_map.get( temba_run.contact.uuid, None) self._process_run_poll_results( org, questions_uuids, temba_run, contact_obj, poll_results_map, poll_results_to_save_map, stats_dict, ) stats_dict["num_synced"] += len(fetch) if progress_callback: progress_callback(stats_dict["num_synced"]) self._save_new_poll_results_to_database( poll_results_to_save_map) logger.info( "Processed fetch of %d - %d " "runs for poll #%d on org #%d" % (stats_dict["num_synced"] - len(fetch), stats_dict["num_synced"], poll.pk, org.pk)) fetch_start = time.time() logger.info("=" * 40) if (stats_dict["num_synced"] >= Poll.POLL_RESULTS_MAX_SYNC_RUNS or time.time() > lock_expiration): poll.rebuild_poll_results_counts() cursor = fetches.get_cursor() self._mark_poll_results_sync_paused( org, poll, cursor, after, before, batches_latest) logger.info( "Break pull results for poll #%d on org #%d in %ds, " " Times: after= %s, before= %s, batch_latest= %s, sync_latest= %s" " Objects: created %d, updated %d, ignored %d. " "Before cursor %s" % ( poll.pk, org.pk, time.time() - start, after, before, batches_latest, latest_synced_obj_time, stats_dict["num_val_created"], stats_dict["num_val_updated"], stats_dict["num_val_ignored"], cursor, )) return ( stats_dict["num_val_created"], stats_dict["num_val_updated"], stats_dict["num_val_ignored"], stats_dict["num_path_created"], stats_dict["num_path_updated"], stats_dict["num_path_ignored"], ) except TembaRateExceededError: poll.rebuild_poll_results_counts() cursor = fetches.get_cursor() self._mark_poll_results_sync_paused( org, poll, cursor, after, before, batches_latest) logger.info( "Break pull results for poll #%d on org #%d in %ds, " " Times: after= %s, before= %s, batch_latest= %s, sync_latest= %s" " Objects: created %d, updated %d, ignored %d. " "Before cursor %s" % ( poll.pk, org.pk, time.time() - start, after, before, batches_latest, latest_synced_obj_time, stats_dict["num_val_created"], stats_dict["num_val_updated"], stats_dict["num_val_ignored"], cursor, )) return ( stats_dict["num_val_created"], stats_dict["num_val_updated"], stats_dict["num_val_ignored"], stats_dict["num_path_created"], stats_dict["num_path_updated"], stats_dict["num_path_ignored"], ) if batches_latest is not None and ( latest_synced_obj_time is None or json_date_to_datetime(latest_synced_obj_time) <= json_date_to_datetime(batches_latest)): latest_synced_obj_time = batches_latest self._mark_poll_results_sync_completed(poll, org, latest_synced_obj_time) # from django.db import connection as db_connection, reset_queries # slowest_queries = sorted(db_connection.queries, key=lambda q: q['time'], reverse=True)[:10] # for q in slowest_queries: # print "=" * 60 # print "\n\n\n" # print "%s -- %s" % (q['time'], q['sql']) # reset_queries() logger.info( "Finished pulling results for poll #%d on org #%d runs in %ds, " "Times: sync_latest= %s," "Objects: created %d, updated %d, ignored %d" % ( poll.pk, org.pk, time.time() - start, latest_synced_obj_time, stats_dict["num_val_created"], stats_dict["num_val_updated"], stats_dict["num_val_ignored"], )) return ( stats_dict["num_val_created"], stats_dict["num_val_updated"], stats_dict["num_val_ignored"], stats_dict["num_path_created"], stats_dict["num_path_updated"], stats_dict["num_path_ignored"], )
def local_kwargs(self, org, remote): from ureport.utils import json_date_to_datetime reporter_group = org.get_config("reporter_group") contact_groups_names = [group.name.lower() for group in remote.groups] if not reporter_group.lower() in contact_groups_names: return None org_state_boundaries_data, org_district_boundaries_data, org_ward_boundaries_data = self.get_boundaries_data( org ) contact_fields = self.get_contact_fields(org) state = "" district = "" ward = "" state_field = org.get_config("state_label") if state_field: state_field = state_field.lower() if org.get_config("is_global"): state_name = remote.fields.get(contact_fields.get(state_field), None) if state_name: state = state_name else: state_name = remote.fields.get(contact_fields.get(state_field), None) if state_name: state_name = state_name.lower() state = org_state_boundaries_data.get(state_name, "") district_field = org.get_config("district_label") if district_field: district_field = district_field.lower() district_name = remote.fields.get(contact_fields.get(district_field), None) if district_name: district_name = district_name.lower() district = org_district_boundaries_data.get(state, dict()).get(district_name, "") ward_field = org.get_config("ward_label") if ward_field: ward_field = ward_field.lower() ward_name = remote.fields.get(contact_fields.get(ward_field), None) if ward_name: ward_name = ward_name.lower() ward = org_ward_boundaries_data.get(district, dict()).get(ward_name, "") registered_on = None registration_field = org.get_config("registration_label") if registration_field: registration_field = registration_field.lower() registered_on = remote.fields.get(contact_fields.get(registration_field), None) if registered_on: registered_on = json_date_to_datetime(registered_on) occupation = "" occupation_field = org.get_config("occupation_label") if occupation_field: occupation_field = occupation_field.lower() occupation = remote.fields.get(contact_fields.get(occupation_field), "") if not occupation: occupation = "" born = 0 born_field = org.get_config("born_label") if born_field: born_field = born_field.lower() try: born = int(remote.fields.get(contact_fields.get(born_field), 0)) # support only positive django integer field valid values if born < 0 or born > 2147483647: born = 0 except ValueError: pass except TypeError: pass gender = "" gender_field = org.get_config("gender_label") female_label = org.get_config("female_label") male_label = org.get_config("male_label") if gender_field: gender_field = gender_field.lower() gender = remote.fields.get(contact_fields.get(gender_field), "") if gender and gender.lower() == female_label.lower(): gender = self.model.FEMALE elif gender and gender.lower() == male_label.lower(): gender = self.model.MALE else: gender = "" return { "org": org, "uuid": remote.uuid, "gender": gender, "born": born, "occupation": occupation, "registered_on": registered_on, "state": state, "district": district, "ward": ward, }
def pull_results(self, poll, modified_after, modified_before, progress_callback=None): org = poll.org r = get_redis_connection() key = Poll.POLL_PULL_RESULTS_TASK_LOCK % (org.pk, poll.flow_uuid) num_val_created = 0 num_val_updated = 0 num_val_ignored = 0 num_path_created = 0 num_path_updated = 0 num_path_ignored = 0 num_synced = 0 if r.get(key): print "Skipping pulling results for poll #%d on org #%d as it is still running" % ( poll.pk, org.pk) else: with r.lock(key, timeout=Poll.POLL_SYNC_LOCK_TIMEOUT): client = self._get_client(org, 2) questions_uuids = poll.get_question_uuids() # ignore the TaskState time and use the time we stored in redis (after, before, latest_synced_obj_time, batches_latest, resume_cursor, pull_after_delete) = poll.get_pull_cached_params() if resume_cursor is None: before = datetime_to_json_date(timezone.now()) after = latest_synced_obj_time if pull_after_delete is not None: after = None poll.delete_poll_results() start = time.time() print "Start fetching runs for poll #%d on org #%d" % (poll.pk, org.pk) poll_runs_query = client.get_runs(flow=poll.flow_uuid, after=after, before=before) fetches = poll_runs_query.iterfetches( retry_on_rate_exceed=True, resume_cursor=resume_cursor) fetch_start = time.time() for fetch in fetches: print "RapidPro API fetch for poll #%d on org #%d %d - %d took %ds" % ( poll.pk, org.pk, num_synced, num_synced + len(fetch), time.time() - fetch_start) contact_uuids = [run.contact.uuid for run in fetch] contacts = Contact.objects.filter(org=org, uuid__in=contact_uuids) contacts_map = {c.uuid: c for c in contacts} existing_poll_results = PollResult.objects.filter( flow=poll.flow_uuid, org=poll.org_id, contact__in=contact_uuids) poll_results_map = defaultdict(dict) for res in existing_poll_results: poll_results_map[res.contact][res.ruleset] = res poll_results_to_save_map = defaultdict(dict) for temba_run in fetch: if batches_latest is None or temba_run.modified_on > json_date_to_datetime( batches_latest): batches_latest = datetime_to_json_date( temba_run.modified_on.replace(tzinfo=pytz.utc)) flow_uuid = temba_run.flow.uuid contact_uuid = temba_run.contact.uuid completed = temba_run.exit_type == 'completed' contact_obj = contacts_map.get(contact_uuid, None) state = '' district = '' ward = '' born = None gender = None if contact_obj is not None: state = contact_obj.state district = contact_obj.district ward = contact_obj.ward born = contact_obj.born gender = contact_obj.gender temba_values = temba_run.values.values() temba_values.sort(key=lambda val: val.time) for temba_value in temba_values: ruleset_uuid = temba_value.node category = temba_value.category text = temba_value.value value_date = temba_value.time existing_poll_result = poll_results_map.get( contact_uuid, dict()).get(ruleset_uuid, None) poll_result_to_save = poll_results_to_save_map.get( contact_uuid, dict()).get(ruleset_uuid, None) if existing_poll_result is not None: update_required = existing_poll_result.category != category or existing_poll_result.text != text update_required = update_required or existing_poll_result.state != state update_required = update_required or existing_poll_result.district != district update_required = update_required or existing_poll_result.ward != ward update_required = update_required or existing_poll_result.born != born update_required = update_required or existing_poll_result.gender != gender update_required = update_required or existing_poll_result.completed != completed # if the reporter answered the step, check if this is a newer run if existing_poll_result.date is not None: update_required = update_required and ( value_date > existing_poll_result.date) else: update_required = True if update_required: # update the db object PollResult.objects.filter( pk=existing_poll_result.pk).update( category=category, text=text, state=state, district=district, ward=ward, date=value_date, born=born, gender=gender, completed=completed) # update the map object as well existing_poll_result.category = category existing_poll_result.text = text existing_poll_result.state = state existing_poll_result.district = district existing_poll_result.ward = ward existing_poll_result.date = value_date existing_poll_result.born = born existing_poll_result.gender = gender existing_poll_result.completed = completed poll_results_map[contact_uuid][ ruleset_uuid] = existing_poll_result num_val_updated += 1 else: num_val_ignored += 1 elif poll_result_to_save is not None: replace_save_map = poll_result_to_save.category != category or poll_result_to_save.text != text replace_save_map = replace_save_map or poll_result_to_save.state != state replace_save_map = replace_save_map or poll_result_to_save.district != district replace_save_map = replace_save_map or poll_result_to_save.ward != ward replace_save_map = replace_save_map or poll_result_to_save.born != born replace_save_map = replace_save_map or poll_result_to_save.gender != gender replace_save_map = replace_save_map or poll_result_to_save.completed != completed # replace if the step is newer if poll_result_to_save.date is not None: replace_save_map = replace_save_map and ( value_date > poll_result_to_save.date) if replace_save_map: result_obj = PollResult( org=org, flow=flow_uuid, ruleset=ruleset_uuid, contact=contact_uuid, category=category, text=text, state=state, district=district, ward=ward, born=born, gender=gender, date=value_date, completed=completed) poll_results_to_save_map[contact_uuid][ ruleset_uuid] = result_obj num_val_ignored += 1 else: result_obj = PollResult(org=org, flow=flow_uuid, ruleset=ruleset_uuid, contact=contact_uuid, category=category, text=text, state=state, district=district, ward=ward, born=born, gender=gender, date=value_date, completed=completed) poll_results_to_save_map[contact_uuid][ ruleset_uuid] = result_obj num_val_created += 1 for temba_path in temba_run.path: ruleset_uuid = temba_path.node category = None text = "" value_date = temba_path.time if ruleset_uuid in questions_uuids: existing_poll_result = poll_results_map.get( contact_uuid, dict()).get(ruleset_uuid, None) poll_result_to_save = poll_results_to_save_map.get( contact_uuid, dict()).get(ruleset_uuid, None) if existing_poll_result is not None: if existing_poll_result.date is None or value_date > existing_poll_result.date: # update the db object PollResult.objects.filter( pk=existing_poll_result.pk).update( category=category, text=text, state=state, district=district, ward=ward, date=value_date, born=born, gender=gender, completed=completed) # update the map object as well existing_poll_result.category = category existing_poll_result.text = text existing_poll_result.state = state existing_poll_result.district = district existing_poll_result.ward = ward existing_poll_result.date = value_date existing_poll_result.born = born existing_poll_result.gender = gender existing_poll_result.completed = completed poll_results_map[contact_uuid][ ruleset_uuid] = existing_poll_result num_path_updated += 1 else: num_path_ignored += 1 elif poll_result_to_save is not None: if value_date > poll_result_to_save.date: result_obj = PollResult( org=org, flow=flow_uuid, ruleset=ruleset_uuid, contact=contact_uuid, category=category, text=text, state=state, district=district, ward=ward, born=born, gender=gender, date=value_date, completed=completed) poll_results_to_save_map[contact_uuid][ ruleset_uuid] = result_obj num_path_ignored += 1 else: result_obj = PollResult( org=org, flow=flow_uuid, ruleset=ruleset_uuid, contact=contact_uuid, category=category, text=text, state=state, district=district, ward=ward, born=born, gender=gender, date=value_date, completed=completed) poll_results_to_save_map[contact_uuid][ ruleset_uuid] = result_obj num_path_created += 1 else: num_path_ignored += 1 num_synced += len(fetch) if progress_callback: progress_callback(num_synced) new_poll_results = [] for c_key in poll_results_to_save_map.keys(): for r_key in poll_results_to_save_map.get( c_key, dict()): obj_to_create = poll_results_to_save_map.get( c_key, dict()).get(r_key, None) if obj_to_create is not None: new_poll_results.append(obj_to_create) PollResult.objects.bulk_create(new_poll_results) print "Processed fetch of %d - %d runs for poll #%d on org #%d" % ( num_synced - len(fetch), num_synced, poll.pk, org.pk) fetch_start = time.time() print "=" * 40 if num_synced >= Poll.POLL_RESULTS_MAX_SYNC_RUNS: poll.rebuild_poll_results_counts() cursor = fetches.get_cursor() cache.set( Poll.POLL_RESULTS_LAST_PULL_CURSOR % (org.pk, poll.flow_uuid), cursor, None) cache.set( Poll.POLL_RESULTS_CURSOR_AFTER_CACHE_KEY % (org.pk, poll.flow_uuid), after, None) cache.set( Poll.POLL_RESULTS_CURSOR_BEFORE_CACHE_KEY % (org.pk, poll.flow_uuid), before, None) cache.set( Poll.POLL_RESULTS_BATCHES_LATEST_CACHE_KEY % (org.pk, poll.flow_uuid), batches_latest, None) print "Break pull results for poll #%d on org #%d in %ds, "\ " Times: after= %s, before= %s, batch_latest= %s, sync_latest= %s"\ " Objects: created %d, updated %d, ignored %d. " \ "Before cursor %s" % (poll.pk, org.pk, time.time() - start, after, before, batches_latest, latest_synced_obj_time, num_val_created, num_val_updated, num_val_ignored, cursor) return (num_val_created, num_val_updated, num_val_ignored, num_path_created, num_path_updated, num_path_ignored) if batches_latest is not None and ( latest_synced_obj_time is None or json_date_to_datetime(latest_synced_obj_time) <= json_date_to_datetime(batches_latest)): latest_synced_obj_time = batches_latest # update the time for this poll from which we fetch next time cache.set( Poll.POLL_RESULTS_LAST_PULL_CACHE_KEY % (org.pk, poll.flow_uuid), latest_synced_obj_time, None) # clear the saved cursor cache.delete(Poll.POLL_RESULTS_LAST_PULL_CURSOR % (org.pk, poll.flow_uuid)) # from django.db import connection as db_connection, reset_queries # slowest_queries = sorted(db_connection.queries, key=lambda q: q['time'], reverse=True)[:10] # for q in slowest_queries: # print "=" * 60 # print "\n\n\n" # print "%s -- %s" % (q['time'], q['sql']) # reset_queries() print "Finished pulling results for poll #%d on org #%d runs in %ds, " \ "Times: sync_latest= %s," \ "Objects: created %d, updated %d, ignored %d" % (poll.pk, org.pk, time.time() - start, latest_synced_obj_time, num_val_created, num_val_updated, num_val_ignored) return num_val_created, num_val_updated, num_val_ignored, num_path_created, num_path_updated, num_path_ignored
def pull_results(self, poll, modified_after, modified_before, progress_callback=None): org = poll.org r = get_redis_connection() key = Poll.POLL_PULL_RESULTS_TASK_LOCK % (org.pk, poll.flow_uuid) num_val_created = 0 num_val_updated = 0 num_val_ignored = 0 num_path_created = 0 num_path_updated = 0 num_path_ignored = 0 num_synced = 0 if r.get(key): print "Skipping pulling results for poll #%d on org #%d as it is still running" % (poll.pk, org.pk) else: with r.lock(key, timeout=Poll.POLL_SYNC_LOCK_TIMEOUT): client = self._get_client(org, 2) questions_uuids = poll.get_question_uuids() # ignore the TaskState time and use the time we stored in redis (after, before, latest_synced_obj_time, batches_latest, resume_cursor, pull_after_delete) = poll.get_pull_cached_params() if resume_cursor is None: before = datetime_to_json_date(timezone.now()) after = latest_synced_obj_time if pull_after_delete is not None: after = None poll.delete_poll_results() start = time.time() print "Start fetching runs for poll #%d on org #%d" % (poll.pk, org.pk) poll_runs_query = client.get_runs(flow=poll.flow_uuid, after=after, before=before) fetches = poll_runs_query.iterfetches(retry_on_rate_exceed=True, resume_cursor=resume_cursor) fetch_start = time.time() for fetch in fetches: print "RapidPro API fetch for poll #%d on org #%d %d - %d took %ds" % (poll.pk, org.pk, num_synced, num_synced + len(fetch), time.time() - fetch_start) contact_uuids = [run.contact.uuid for run in fetch] contacts = Contact.objects.filter(org=org, uuid__in=contact_uuids) contacts_map = {c.uuid: c for c in contacts} existing_poll_results = PollResult.objects.filter(flow=poll.flow_uuid, org=poll.org_id, contact__in=contact_uuids) poll_results_map = defaultdict(dict) for res in existing_poll_results: poll_results_map[res.contact][res.ruleset] = res poll_results_to_save_map = defaultdict(dict) for temba_run in fetch: if batches_latest is None or temba_run.modified_on > json_date_to_datetime(batches_latest): batches_latest = datetime_to_json_date(temba_run.modified_on.replace(tzinfo=pytz.utc)) flow_uuid = temba_run.flow.uuid contact_uuid = temba_run.contact.uuid completed = temba_run.exit_type == 'completed' contact_obj = contacts_map.get(contact_uuid, None) state = '' district = '' ward = '' born = None gender = None if contact_obj is not None: state = contact_obj.state district = contact_obj.district ward = contact_obj.ward born = contact_obj.born gender = contact_obj.gender temba_values = temba_run.values.values() temba_values.sort(key=lambda val: val.time) for temba_value in temba_values: ruleset_uuid = temba_value.node category = temba_value.category text = temba_value.value value_date = temba_value.time existing_poll_result = poll_results_map.get(contact_uuid, dict()).get(ruleset_uuid, None) poll_result_to_save = poll_results_to_save_map.get(contact_uuid, dict()).get(ruleset_uuid, None) if existing_poll_result is not None: update_required = existing_poll_result.category != category or existing_poll_result.text != text update_required = update_required or existing_poll_result.state != state update_required = update_required or existing_poll_result.district != district update_required = update_required or existing_poll_result.ward != ward update_required = update_required or existing_poll_result.born != born update_required = update_required or existing_poll_result.gender != gender update_required = update_required or existing_poll_result.completed != completed # if the reporter answered the step, check if this is a newer run if existing_poll_result.date is not None: update_required = update_required and (value_date > existing_poll_result.date) else: update_required = True if update_required: # update the db object PollResult.objects.filter(pk=existing_poll_result.pk).update(category=category, text=text, state=state, district=district, ward=ward, date=value_date, born=born, gender=gender, completed=completed) # update the map object as well existing_poll_result.category = category existing_poll_result.text = text existing_poll_result.state = state existing_poll_result.district = district existing_poll_result.ward = ward existing_poll_result.date = value_date existing_poll_result.born = born existing_poll_result.gender = gender existing_poll_result.completed = completed poll_results_map[contact_uuid][ruleset_uuid] = existing_poll_result num_val_updated += 1 else: num_val_ignored += 1 elif poll_result_to_save is not None: replace_save_map = poll_result_to_save.category != category or poll_result_to_save.text != text replace_save_map = replace_save_map or poll_result_to_save.state != state replace_save_map = replace_save_map or poll_result_to_save.district != district replace_save_map = replace_save_map or poll_result_to_save.ward != ward replace_save_map = replace_save_map or poll_result_to_save.born != born replace_save_map = replace_save_map or poll_result_to_save.gender != gender replace_save_map = replace_save_map or poll_result_to_save.completed != completed # replace if the step is newer if poll_result_to_save.date is not None: replace_save_map = replace_save_map and (value_date > poll_result_to_save.date) if replace_save_map: result_obj = PollResult(org=org, flow=flow_uuid, ruleset=ruleset_uuid, contact=contact_uuid, category=category, text=text, state=state, district=district, ward=ward, born=born, gender=gender, date=value_date, completed=completed) poll_results_to_save_map[contact_uuid][ruleset_uuid] = result_obj num_val_ignored += 1 else: result_obj = PollResult(org=org, flow=flow_uuid, ruleset=ruleset_uuid, contact=contact_uuid, category=category, text=text, state=state, district=district, ward=ward, born=born, gender=gender, date=value_date, completed=completed) poll_results_to_save_map[contact_uuid][ruleset_uuid] = result_obj num_val_created += 1 for temba_path in temba_run.path: ruleset_uuid = temba_path.node category = None text = "" value_date = temba_path.time if ruleset_uuid in questions_uuids: existing_poll_result = poll_results_map.get(contact_uuid, dict()).get(ruleset_uuid, None) poll_result_to_save = poll_results_to_save_map.get(contact_uuid, dict()).get(ruleset_uuid, None) if existing_poll_result is not None: if existing_poll_result.date is None or value_date > existing_poll_result.date: # update the db object PollResult.objects.filter(pk=existing_poll_result.pk).update(category=category, text=text, state=state, district=district, ward=ward, date=value_date, born=born, gender=gender, completed=completed) # update the map object as well existing_poll_result.category = category existing_poll_result.text = text existing_poll_result.state = state existing_poll_result.district = district existing_poll_result.ward = ward existing_poll_result.date = value_date existing_poll_result.born = born existing_poll_result.gender = gender existing_poll_result.completed = completed poll_results_map[contact_uuid][ruleset_uuid] = existing_poll_result num_path_updated += 1 else: num_path_ignored += 1 elif poll_result_to_save is not None: if value_date > poll_result_to_save.date: result_obj = PollResult(org=org, flow=flow_uuid, ruleset=ruleset_uuid, contact=contact_uuid, category=category, text=text, state=state, district=district, ward=ward, born=born, gender=gender, date=value_date, completed=completed) poll_results_to_save_map[contact_uuid][ruleset_uuid] = result_obj num_path_ignored += 1 else: result_obj = PollResult(org=org, flow=flow_uuid, ruleset=ruleset_uuid, contact=contact_uuid, category=category, text=text, state=state, district=district, ward=ward, born=born, gender=gender, date=value_date, completed=completed) poll_results_to_save_map[contact_uuid][ruleset_uuid] = result_obj num_path_created += 1 else: num_path_ignored += 1 num_synced += len(fetch) if progress_callback: progress_callback(num_synced) new_poll_results = [] for c_key in poll_results_to_save_map.keys(): for r_key in poll_results_to_save_map.get(c_key, dict()): obj_to_create = poll_results_to_save_map.get(c_key, dict()).get(r_key, None) if obj_to_create is not None: new_poll_results.append(obj_to_create) PollResult.objects.bulk_create(new_poll_results) print "Processed fetch of %d - %d runs for poll #%d on org #%d" % (num_synced - len(fetch), num_synced, poll.pk, org.pk) fetch_start = time.time() print "=" * 40 if num_synced >= Poll.POLL_RESULTS_MAX_SYNC_RUNS: poll.rebuild_poll_results_counts() cursor = fetches.get_cursor() cache.set(Poll.POLL_RESULTS_LAST_PULL_CURSOR % (org.pk, poll.flow_uuid), cursor, None) cache.set(Poll.POLL_RESULTS_CURSOR_AFTER_CACHE_KEY % (org.pk, poll.flow_uuid), after, None) cache.set(Poll.POLL_RESULTS_CURSOR_BEFORE_CACHE_KEY % (org.pk, poll.flow_uuid), before, None) cache.set(Poll.POLL_RESULTS_BATCHES_LATEST_CACHE_KEY % (org.pk, poll.flow_uuid), batches_latest, None) print "Break pull results for poll #%d on org #%d in %ds, "\ " Times: after= %s, before= %s, batch_latest= %s, sync_latest= %s"\ " Objects: created %d, updated %d, ignored %d. " \ "Before cursor %s" % (poll.pk, org.pk, time.time() - start, after, before, batches_latest, latest_synced_obj_time, num_val_created, num_val_updated, num_val_ignored, cursor) return (num_val_created, num_val_updated, num_val_ignored, num_path_created, num_path_updated, num_path_ignored) if batches_latest is not None and (latest_synced_obj_time is None or json_date_to_datetime(latest_synced_obj_time) <= json_date_to_datetime(batches_latest)): latest_synced_obj_time = batches_latest # update the time for this poll from which we fetch next time cache.set(Poll.POLL_RESULTS_LAST_PULL_CACHE_KEY % (org.pk, poll.flow_uuid), latest_synced_obj_time, None) # clear the saved cursor cache.delete(Poll.POLL_RESULTS_LAST_PULL_CURSOR % (org.pk, poll.flow_uuid)) # from django.db import connection as db_connection, reset_queries # slowest_queries = sorted(db_connection.queries, key=lambda q: q['time'], reverse=True)[:10] # for q in slowest_queries: # print "=" * 60 # print "\n\n\n" # print "%s -- %s" % (q['time'], q['sql']) # reset_queries() print "Finished pulling results for poll #%d on org #%d runs in %ds, " \ "Times: sync_latest= %s," \ "Objects: created %d, updated %d, ignored %d" % (poll.pk, org.pk, time.time() - start, latest_synced_obj_time, num_val_created, num_val_updated, num_val_ignored) return num_val_created, num_val_updated, num_val_ignored, num_path_created, num_path_updated, num_path_ignored
def kwargs_from_temba(cls, org, temba_contact): from ureport.utils import json_date_to_datetime state = '' district = '' state_field = org.get_config('state_label') if state_field: if org.get_config('is_global'): state_name = temba_contact.fields.get( cls.find_contact_field_key(org, state_field), None) if state_name: state = state_name else: state_name = temba_contact.fields.get( cls.find_contact_field_key(org, state_field), None) state_boundary = Boundary.objects.filter( org=org, level=1, name__iexact=state_name).first() if state_boundary: state = state_boundary.osm_id district_field = org.get_config('district_label') if district_field: district_name = temba_contact.fields.get( cls.find_contact_field_key(org, district_field), None) district_boundary = Boundary.objects.filter( org=org, level=2, name__iexact=district_name, parent=state_boundary).first() if district_boundary: district = district_boundary.osm_id registered_on = None registration_field = org.get_config('registration_label') if registration_field: registered_on = temba_contact.fields.get( cls.find_contact_field_key(org, registration_field), None) if registered_on: registered_on = json_date_to_datetime(registered_on) occupation = '' occupation_field = org.get_config('occupation_label') if occupation_field: occupation = temba_contact.fields.get( cls.find_contact_field_key(org, occupation_field), '') if not occupation: occupation = '' born = 0 born_field = org.get_config('born_label') if born_field: try: born = int( temba_contact.fields.get( cls.find_contact_field_key(org, born_field), 0)) except ValueError: pass except TypeError: pass gender = '' gender_field = org.get_config('gender_label') female_label = org.get_config('female_label') male_label = org.get_config('male_label') if gender_field: gender = temba_contact.fields.get( cls.find_contact_field_key(org, gender_field), '') if gender and gender.lower() == female_label.lower(): gender = cls.FEMALE elif gender and gender.lower() == male_label.lower(): gender = cls.MALE else: gender = '' return dict(org=org, uuid=temba_contact.uuid, gender=gender, born=born, occupation=occupation, registered_on=registered_on, district=district, state=state)
def local_kwargs(self, org, remote): from ureport.utils import json_date_to_datetime org_state_boundaries_data, org_district_boundaries_data, org_ward_boundaries_data = self.get_boundaries_data( org ) state = "" district = "" ward = "" state_field = org.get_config("%s.state_label" % self.backend.slug, default="") if state_field: state_field = state_field.lower() if org.get_config("common.is_global"): state_name = remote.fields.get(state_field, None) if state_name: state = state_name else: state_path = remote.fields.get(state_field, None) if state_path: state_name = state_path.split(" > ")[-1] state_name = state_name.lower() state = org_state_boundaries_data.get(state_name, "") district_field = org.get_config("%s.district_label" % self.backend.slug, default="") if district_field: district_field = district_field.lower() district_path = remote.fields.get(district_field, None) if district_path: district_name = district_path.split(" > ")[-1] district_name = district_name.lower() district = org_district_boundaries_data.get(state, dict()).get(district_name, "") ward_field = org.get_config("%s.ward_label" % self.backend.slug, default="") if ward_field: ward_field = ward_field.lower() ward_path = remote.fields.get(ward_field, None) if ward_path: ward_name = ward_path.split(" > ")[-1] ward_name = ward_name.lower() ward = org_ward_boundaries_data.get(district, dict()).get(ward_name, "") registered_on = None registration_field = org.get_config("%s.registration_label" % self.backend.slug, default="") if registration_field: registration_field = registration_field.lower() registered_on = remote.fields.get(registration_field, None) if registered_on: registered_on = json_date_to_datetime(registered_on) occupation = "" occupation_field = org.get_config("%s.occupation_label" % self.backend.slug, default="") if occupation_field: occupation_field = occupation_field.lower() occupation = remote.fields.get(occupation_field, "") if not occupation: occupation = "" born = 0 born_field = org.get_config("%s.born_label" % self.backend.slug, default="") if born_field: born_field = born_field.lower() try: born = int(remote.fields.get(born_field, 0)) # support only positive django integer field valid values if born < 0 or born > 2147483647: born = 0 except ValueError: pass except TypeError: pass gender = "" gender_field = org.get_config("%s.gender_label" % self.backend.slug, default="") female_label = org.get_config("%s.female_label" % self.backend.slug, default="") male_label = org.get_config("%s.male_label" % self.backend.slug, default="") if gender_field: gender_field = gender_field.lower() gender = remote.fields.get(gender_field, "") if gender and gender.lower() == female_label.lower(): gender = self.model.FEMALE elif gender and gender.lower() == male_label.lower(): gender = self.model.MALE else: gender = "" return { "backend": self.backend, "org": org, "uuid": remote.uuid, "gender": gender, "born": born, "occupation": occupation, "registered_on": registered_on, "state": state, "district": district, "ward": ward, }
def local_kwargs(self, org, remote): from ureport.utils import json_date_to_datetime reporter_group = org.get_config('reporter_group') contact_groups_names = [group.name.lower() for group in remote.groups] if not reporter_group.lower() in contact_groups_names: return None org_state_boundaries_data, org_district_boundaries_data, org_ward_boundaries_data = self.get_boundaries_data(org) contact_fields = self.get_contact_fields(org) state = '' district = '' ward = '' state_field = org.get_config('state_label') if state_field: state_field = state_field.lower() if org.get_config('is_global'): state_name = remote.fields.get(contact_fields.get(state_field), None) if state_name: state = state_name else: state_name = remote.fields.get(contact_fields.get(state_field), None) if state_name: state_name = state_name.lower() state = org_state_boundaries_data.get(state_name, '') district_field = org.get_config('district_label') if district_field: district_field = district_field.lower() district_name = remote.fields.get(contact_fields.get(district_field), None) if district_name: district_name = district_name.lower() district = org_district_boundaries_data.get(state, dict()).get(district_name, '') ward_field = org.get_config('ward_label') if ward_field: ward_field = ward_field.lower() ward_name = remote.fields.get(contact_fields.get(ward_field), None) if ward_name: ward_name = ward_name.lower() ward = org_ward_boundaries_data.get(district, dict()).get(ward_name, '') registered_on = None registration_field = org.get_config('registration_label') if registration_field: registration_field = registration_field.lower() registered_on = remote.fields.get(contact_fields.get(registration_field), None) if registered_on: registered_on = json_date_to_datetime(registered_on) occupation = '' occupation_field = org.get_config('occupation_label') if occupation_field: occupation_field = occupation_field.lower() occupation = remote.fields.get(contact_fields.get(occupation_field), '') if not occupation: occupation = '' born = 0 born_field = org.get_config('born_label') if born_field: born_field = born_field.lower() try: born = int(remote.fields.get(contact_fields.get(born_field), 0)) # support only positive django integer field valid values if born < 0 or born > 2147483647: born = 0 except ValueError: pass except TypeError: pass gender = '' gender_field = org.get_config('gender_label') female_label = org.get_config('female_label') male_label = org.get_config('male_label') if gender_field: gender_field = gender_field.lower() gender = remote.fields.get(contact_fields.get(gender_field), '') if gender and gender.lower() == female_label.lower(): gender = self.model.FEMALE elif gender and gender.lower() == male_label.lower(): gender = self.model.MALE else: gender = '' return { 'org': org, 'uuid': remote.uuid, 'gender': gender, 'born': born, 'occupation': occupation, 'registered_on': registered_on, 'state': state, 'district': district, 'ward': ward }
def _process_run_poll_results( self, org, flow_uuid, questions_uuids, result, contact_obj, existing_db_poll_results_map, poll_results_to_save_map, stats_dict, ): contact_uuid = result[2] completed = True state = "" district = "" ward = "" born = None gender = None if contact_obj is not None: state = contact_obj.state district = contact_obj.district ward = contact_obj.ward born = contact_obj.born gender = contact_obj.gender value_date = json_date_to_datetime(result[0]) ruleset_uuid = result[4] category = result[5] text = result[5] existing_poll_result = existing_db_poll_results_map.get(contact_uuid, dict()).get(ruleset_uuid, None) poll_result_to_save = poll_results_to_save_map.get(contact_uuid, dict()).get(ruleset_uuid, None) if existing_poll_result is not None: update_required = self._check_update_required( existing_poll_result, category, text, state, district, ward, born, gender, completed, value_date ) if update_required: # update the db object PollResult.objects.filter(pk=existing_poll_result.pk).update( category=category, text=text, state=state, district=district, ward=ward, date=value_date, born=born, gender=gender, completed=completed, ) # update the map object as well existing_poll_result.category = category existing_poll_result.text = text existing_poll_result.state = state existing_poll_result.district = district existing_poll_result.ward = ward existing_poll_result.date = value_date existing_poll_result.born = born existing_poll_result.gender = gender existing_poll_result.completed = completed existing_db_poll_results_map[contact_uuid][ruleset_uuid] = existing_poll_result stats_dict["num_val_updated"] += 1 else: stats_dict["num_val_ignored"] += 1 elif poll_result_to_save is not None: replace_save_map = self._check_update_required( poll_result_to_save, category, text, state, district, ward, born, gender, completed, value_date ) if replace_save_map: result_obj = PollResult( org=org, flow=flow_uuid, ruleset=ruleset_uuid, contact=contact_uuid, category=category, text=text, state=state, district=district, ward=ward, born=born, gender=gender, date=value_date, completed=completed, ) poll_results_to_save_map[contact_uuid][ruleset_uuid] = result_obj stats_dict["num_val_ignored"] += 1 else: result_obj = PollResult( org=org, flow=flow_uuid, ruleset=ruleset_uuid, contact=contact_uuid, category=category, text=text, state=state, district=district, ward=ward, born=born, gender=gender, date=value_date, completed=completed, ) poll_results_to_save_map[contact_uuid][ruleset_uuid] = result_obj stats_dict["num_val_created"] += 1
def pull_results(self, poll, modified_after, modified_before, progress_callback=None): org = poll.org r = get_redis_connection() key = Poll.POLL_PULL_RESULTS_TASK_LOCK % (org.pk, poll.flow_uuid) stats_dict = dict( num_val_created=0, num_val_updated=0, num_val_ignored=0, num_path_created=0, num_path_updated=0, num_path_ignored=0, num_synced=0, ) if r.get(key): logger.info("Skipping pulling results for poll #%d on org #%d as it is still running" % (poll.pk, org.pk)) else: with r.lock(key, timeout=Poll.POLL_SYNC_LOCK_TIMEOUT): lock_expiration = time.time() + 0.8 * Poll.POLL_SYNC_LOCK_TIMEOUT poll_results_url = "https://go.votomobile.org/flow-results/packages/%s/responses" % poll.flow_uuid headers = { "Content-type": "application/json", "Accept": "application/json", "Authorization": "Token %s" % self.backend.api_token, } results = [] questions_uuids = poll.get_question_uuids() # ignore the TaskState time and use the time we stored in redis ( after, before, latest_synced_obj_time, batches_latest, resume_cursor, pull_after_delete, ) = poll.get_pull_cached_params() if pull_after_delete is not None: after = None latest_synced_obj_time = None batches_latest = None resume_cursor = None poll.delete_poll_results() if resume_cursor is None: before = datetime_to_json_date(timezone.now()) after = latest_synced_obj_time start = time.time() logger.info("Start fetching runs for poll #%d on org #%d" % (poll.pk, org.pk)) params = dict( filter={"end-timestamp": before, "start-timestamp": after}, page={"beforeCursor": resume_cursor} ) while poll_results_url: response = requests.request("GET", poll_results_url, headers=headers, params=params) response_json = response.json() results = response_json["data"]["attributes"]["responses"] poll_results_url = response_json["data"]["relationships"]["links"]["next"] contacts_map, poll_results_map, poll_results_to_save_map = self._initiate_lookup_maps( results, org, poll ) for result in results: if batches_latest is None or json_date_to_datetime(result[0]) > json_date_to_datetime( batches_latest ): batches_latest = result[0] contact_obj = contacts_map.get(result[2], None) self._process_run_poll_results( org, poll.flow_uuid, questions_uuids, result, contact_obj, poll_results_map, poll_results_to_save_map, stats_dict, ) stats_dict["num_synced"] += len(results) if progress_callback: progress_callback(stats_dict["num_synced"]) self._save_new_poll_results_to_database(poll_results_to_save_map) logger.info( "Processed fetch of %d - %d " "runs for poll #%d on org #%d" % (stats_dict["num_synced"] - len(results), stats_dict["num_synced"], poll.pk, org.pk) ) # fetch_start = time.time() logger.info("=" * 40) if stats_dict["num_synced"] >= Poll.POLL_RESULTS_MAX_SYNC_RUNS or time.time() > lock_expiration: poll.rebuild_poll_results_counts() cursor = result[1] self._mark_poll_results_sync_paused(org, poll, cursor, after, before, batches_latest) logger.info( "Break pull results for poll #%d on org #%d in %ds, " " Times: after= %s, before= %s, batch_latest= %s, sync_latest= %s" " Objects: created %d, updated %d, ignored %d. " "Before cursor %s" % ( poll.pk, org.pk, time.time() - start, after, before, batches_latest, latest_synced_obj_time, stats_dict["num_val_created"], stats_dict["num_val_updated"], stats_dict["num_val_ignored"], cursor, ) ) return ( stats_dict["num_val_created"], stats_dict["num_val_updated"], stats_dict["num_val_ignored"], stats_dict["num_path_created"], stats_dict["num_path_updated"], stats_dict["num_path_ignored"], ) if batches_latest is not None and ( latest_synced_obj_time is None or json_date_to_datetime(latest_synced_obj_time) <= json_date_to_datetime(batches_latest) ): latest_synced_obj_time = batches_latest self._mark_poll_results_sync_completed(poll, org, latest_synced_obj_time) # from django.db import connection as db_connection, reset_queries # slowest_queries = sorted(db_connection.queries, key=lambda q: q['time'], reverse=True)[:10] # for q in slowest_queries: # print "=" * 60 # print "\n\n\n" # print "%s -- %s" % (q['time'], q['sql']) # reset_queries() logger.info( "Finished pulling results for poll #%d on org #%d runs in %ds, " "Times: sync_latest= %s," "Objects: created %d, updated %d, ignored %d" % ( poll.pk, org.pk, time.time() - start, latest_synced_obj_time, stats_dict["num_val_created"], stats_dict["num_val_updated"], stats_dict["num_val_ignored"], ) ) return ( stats_dict["num_val_created"], stats_dict["num_val_updated"], stats_dict["num_val_ignored"], stats_dict["num_path_created"], stats_dict["num_path_updated"], stats_dict["num_path_ignored"], )
def test_kwargs_from_temba(self): temba_contact = TembaContact.create(uuid='C-006', name="Jan", urns=['tel:123'], groups=['G-001', 'G-007'], fields={ 'registration_date': None, 'state': None, 'lga': None, 'occupation': None, 'born': None, 'gender': None }, language='eng') kwargs = Contact.kwargs_from_temba(self.nigeria, temba_contact) self.assertEqual( kwargs, dict(uuid='C-006', org=self.nigeria, gender='', born=0, occupation='', registered_on=None, state='', district='', ward='')) # try creating contact from them Contact.objects.create(**kwargs) # Invalid boundaries become '' temba_contact = TembaContact.create(uuid='C-007', name="Jan", urns=['tel:123'], groups=['G-001', 'G-007'], fields={ 'registration_date': '2014-01-02T03:04:05.000000Z', 'state': 'Kigali', 'lga': 'Oyo', 'occupation': 'Student', 'born': '1990', 'gender': 'Male' }, language='eng') kwargs = Contact.kwargs_from_temba(self.nigeria, temba_contact) self.assertEqual( kwargs, dict( uuid='C-007', org=self.nigeria, gender='M', born=1990, occupation='Student', registered_on=json_date_to_datetime('2014-01-02T03:04:05.000'), state='', district='', ward='')) # try creating contact from them Contact.objects.create(**kwargs) temba_contact = TembaContact.create(uuid='C-008', name="Jan", urns=['tel:123'], groups=['G-001', 'G-007'], fields={ 'registration_date': '2014-01-02T03:04:05.000000Z', 'state': 'Lagos', 'lga': 'Oyo', 'ward': 'Ikeja', 'occupation': 'Student', 'born': '1990', 'gender': 'Male' }, language='eng') kwargs = Contact.kwargs_from_temba(self.nigeria, temba_contact) self.assertEqual( kwargs, dict( uuid='C-008', org=self.nigeria, gender='M', born=1990, occupation='Student', registered_on=json_date_to_datetime('2014-01-02T03:04:05.000'), state='R-LAGOS', district='R-OYO', ward='R-IKEJA')) # try creating contact from them Contact.objects.create(**kwargs)
def kwargs_from_temba(cls, org, temba_contact): from ureport.utils import json_date_to_datetime state = '' district = '' ward = '' state_field = org.get_config('state_label') if state_field: if org.get_config('is_global'): state_name = temba_contact.fields.get(cls.find_contact_field_key(org, state_field), None) if state_name: state = state_name else: state_name = temba_contact.fields.get(cls.find_contact_field_key(org, state_field), None) state_boundary = Boundary.objects.filter(org=org, level=Boundary.STATE_LEVEL, name__iexact=state_name).first() if state_boundary: state = state_boundary.osm_id district_field = org.get_config('district_label') if district_field: district_name = temba_contact.fields.get(cls.find_contact_field_key(org, district_field), None) district_boundary = Boundary.objects.filter(org=org, level=Boundary.DISTRICT_LEVEL, name__iexact=district_name, parent=state_boundary).first() if district_boundary: district = district_boundary.osm_id ward_field = org.get_config('ward_label') if ward_field: ward_name = temba_contact.fields.get(cls.find_contact_field_key(org, ward_field), None) ward_boundary = Boundary.objects.filter(org=org, level=Boundary.WARD_LEVEL, name__iexact=ward_name, parent=district_boundary).first() if ward_boundary: ward = ward_boundary.osm_id registered_on = None registration_field = org.get_config('registration_label') if registration_field: registered_on = temba_contact.fields.get(cls.find_contact_field_key(org, registration_field), None) if registered_on: registered_on = json_date_to_datetime(registered_on) occupation = '' occupation_field = org.get_config('occupation_label') if occupation_field: occupation = temba_contact.fields.get(cls.find_contact_field_key(org, occupation_field), '') if not occupation: occupation = '' born = 0 born_field = org.get_config('born_label') if born_field: try: born = int(temba_contact.fields.get(cls.find_contact_field_key(org, born_field), 0)) except ValueError: pass except TypeError: pass gender = '' gender_field = org.get_config('gender_label') female_label = org.get_config('female_label') male_label = org.get_config('male_label') if gender_field: gender = temba_contact.fields.get(cls.find_contact_field_key(org, gender_field), '') if gender and gender.lower() == female_label.lower(): gender = cls.FEMALE elif gender and gender.lower() == male_label.lower(): gender = cls.MALE else: gender = '' return dict(org=org, uuid=temba_contact.uuid, gender=gender, born=born, occupation=occupation, registered_on=registered_on, ward=ward, district=district, state=state)
def test_local_kwargs(self): temba_contact = TembaContact.create(uuid='C-006', name="Jan", urns=['tel:123'], groups=[ObjectRef.create(uuid='G-001', name='Musicians'), ObjectRef.create(uuid='G-007', name='Actors')], fields={'registration_date': None, 'state': None, 'lga': None, 'occupation': None, 'born': None, 'gender': None}, language='eng') self.assertIsNone(self.syncer.local_kwargs(self.nigeria, temba_contact)) temba_contact = TembaContact.create(uuid='C-006', name="Jan", urns=['tel:123'], groups=[ObjectRef.create(uuid='G-001', name='ureporters'), ObjectRef.create(uuid='G-007', name='Actors')], fields={'registration_date': None, 'state': None, 'lga': None, 'occupation': None, 'born': None, 'gender': None}, language='eng') self.assertEqual(self.syncer.local_kwargs(self.nigeria, temba_contact), {'org': self.nigeria, 'uuid': 'C-006', 'gender': '', 'born': 0, 'occupation': '', 'registered_on': None, 'state': '', 'district': ''}) temba_contact = TembaContact.create(uuid='C-007', name="Jan", urns=['tel:123'], groups=[ObjectRef.create(uuid='G-001', name='ureporters'), ObjectRef.create(uuid='G-007', name='Actors')], fields={'registration_date': '2014-01-02T03:04:05.000000Z', 'state': 'Kigali', 'lga': 'Oyo', 'occupation': 'Student', 'born': '1990', 'gender': 'Male'}, language='eng') self.assertEqual(self.syncer.local_kwargs(self.nigeria, temba_contact), {'org': self.nigeria, 'uuid': 'C-007', 'gender': 'M', 'born': 1990, 'occupation': 'Student', 'registered_on': json_date_to_datetime('2014-01-02T03:04:05.000'), 'state': '', 'district': ''}) temba_contact = TembaContact.create(uuid='C-008', name="Jan", urns=['tel:123'], groups=[ObjectRef.create(uuid='G-001', name='ureporters'), ObjectRef.create(uuid='G-007', name='Actors')], fields={'registration_date': '2014-01-02T03:04:05.000000Z', 'state':'Lagos', 'lga': 'Oyo', 'occupation': 'Student', 'born': '1990', 'gender': 'Male'}, language='eng') self.assertEqual(self.syncer.local_kwargs(self.nigeria, temba_contact), {'org': self.nigeria, 'uuid': 'C-008', 'gender': 'M', 'born': 1990, 'occupation': 'Student', 'registered_on': json_date_to_datetime('2014-01-02T03:04:05.000'), 'state': 'R-LAGOS', 'district': 'R-OYO'}) temba_contact = TembaContact.create(uuid='C-008', name="Jan", urns=['tel:123'], groups=[ObjectRef.create(uuid='G-001', name='ureporters'), ObjectRef.create(uuid='G-007', name='Actors')], fields={'registration_date': '2014-01-02T03:04:05.000000Z', 'state':'Lagos', 'lga': 'Oyo', 'occupation': 'Student', 'born': '-1', 'gender': 'Male'}, language='eng') self.assertEqual(self.syncer.local_kwargs(self.nigeria, temba_contact), {'org': self.nigeria, 'uuid': 'C-008', 'gender': 'M', 'born': 0, 'occupation': 'Student', 'registered_on': json_date_to_datetime('2014-01-02T03:04:05.000'), 'state': 'R-LAGOS', 'district': 'R-OYO'}) temba_contact = TembaContact.create(uuid='C-008', name="Jan", urns=['tel:123'], groups=[ObjectRef.create(uuid='G-001', name='ureporters'), ObjectRef.create(uuid='G-007', name='Actors')], fields={'registration_date': '2014-01-02T03:04:05.000000Z', 'state':'Lagos', 'lga': 'Oyo', 'occupation': 'Student', 'born': '2147483648', 'gender': 'Male'}, language='eng') self.assertEqual(self.syncer.local_kwargs(self.nigeria, temba_contact), {'org': self.nigeria, 'uuid': 'C-008', 'gender': 'M', 'born': 0, 'occupation': 'Student', 'registered_on': json_date_to_datetime('2014-01-02T03:04:05.000'), 'state': 'R-LAGOS', 'district': 'R-OYO'})
def local_kwargs(self, org, remote): from ureport.utils import json_date_to_datetime reporter_group = org.get_config('reporter_group') contact_groups_names = [group.name.lower() for group in remote.groups] if not reporter_group.lower() in contact_groups_names: return None org_state_boundaries_data, org_district_boundaries_data, org_ward_boundaries_data = self.get_boundaries_data( org) contact_fields = self.get_contact_fields(org) state = '' district = '' ward = '' state_field = org.get_config('state_label') if state_field: state_field = state_field.lower() if org.get_config('is_global'): state_name = remote.fields.get(contact_fields.get(state_field), None) if state_name: state = state_name else: state_name = remote.fields.get(contact_fields.get(state_field), None) if state_name: state_name = state_name.lower() state = org_state_boundaries_data.get(state_name, '') district_field = org.get_config('district_label') if district_field: district_field = district_field.lower() district_name = remote.fields.get( contact_fields.get(district_field), None) if district_name: district_name = district_name.lower() district = org_district_boundaries_data.get( state, dict()).get(district_name, '') ward_field = org.get_config('ward_label') if ward_field: ward_field = ward_field.lower() ward_name = remote.fields.get( contact_fields.get(ward_field), None) if ward_name: ward_name = ward_name.lower() ward = org_ward_boundaries_data.get(district, dict()).get( ward_name, '') registered_on = None registration_field = org.get_config('registration_label') if registration_field: registration_field = registration_field.lower() registered_on = remote.fields.get( contact_fields.get(registration_field), None) if registered_on: registered_on = json_date_to_datetime(registered_on) occupation = '' occupation_field = org.get_config('occupation_label') if occupation_field: occupation_field = occupation_field.lower() occupation = remote.fields.get( contact_fields.get(occupation_field), '') if not occupation: occupation = '' born = 0 born_field = org.get_config('born_label') if born_field: born_field = born_field.lower() try: born = int(remote.fields.get(contact_fields.get(born_field), 0)) # support only positive django integer field valid values if born < 0 or born > 2147483647: born = 0 except ValueError: pass except TypeError: pass gender = '' gender_field = org.get_config('gender_label') female_label = org.get_config('female_label') male_label = org.get_config('male_label') if gender_field: gender_field = gender_field.lower() gender = remote.fields.get(contact_fields.get(gender_field), '') if gender and gender.lower() == female_label.lower(): gender = self.model.FEMALE elif gender and gender.lower() == male_label.lower(): gender = self.model.MALE else: gender = '' return { 'org': org, 'uuid': remote.uuid, 'gender': gender, 'born': born, 'occupation': occupation, 'registered_on': registered_on, 'state': state, 'district': district, 'ward': ward }
def test_contact_ward_field(self): temba_contact = TembaContact.create(uuid='C-0011', name="Jan", urns=['tel:123'], groups=['G-001', 'G-007'], fields={ 'registration_date': '2014-01-02T03:04:05.000000Z', 'state': 'Lagos', 'lga': '', 'ward': 'Ikeja', 'occupation': 'Student', 'born': '1990', 'gender': 'Male' }, language='eng') kwargs = Contact.kwargs_from_temba(self.nigeria, temba_contact) # invalid parent boundary (district) will yield empty ward self.assertEqual( kwargs, dict( uuid='C-0011', org=self.nigeria, gender='M', born=1990, occupation='Student', registered_on=json_date_to_datetime('2014-01-02T03:04:05.000'), state='R-LAGOS', district='', ward='')) self.assertEqual(ReportersCounter.get_counts(self.nigeria), dict()) Contact.objects.create( uuid='C-007', org=self.nigeria, gender='M', born=1990, occupation='Student', registered_on=json_date_to_datetime('2014-01-02T03:04:05.000'), state='R-LAGOS', district='R-OYO', ward='R-IKEJA') field_count = ReportersCounter.get_counts(self.nigeria) self.assertEqual(field_count['ward:R-IKEJA'], 1) Contact.objects.create( uuid='C-008', org=self.nigeria, gender='M', born=1980, occupation='Teacher', registered_on=json_date_to_datetime('2014-01-02T03:07:05.000'), state='R-LAGOS', district='R-OYO', ward='R-IKEJA') field_count = ReportersCounter.get_counts(self.nigeria) self.assertEqual(field_count['ward:R-IKEJA'], 2) Contact.objects.all().delete()