def populate_contacts_backend(apps, schema_editor): Contact = apps.get_model('contacts', 'Contact') ContactField = apps.get_model('contacts', 'ContactField') Org = apps.get_model('orgs', 'Org') start = time.time() for org in Org.objects.all(): backend = org.backends.filter(slug="rapidpro").first() ContactField.objects.filter(org=org).update(backend=backend) print("Processed fields") i = 0 contact_ids = Contact.objects.filter(org_id=org.id).values_list( 'id', flat=True) for batch in chunk_list(contact_ids, 1000): updated = Contact.objects.filter( id__in=batch, org_id=org.id).update(backend_id=backend.id) i += updated print("Processed %d / %d contacts in %ds" % (i, len(contact_ids), time.time() - start)) print("Finished setting contacts and fields backend for org %s" % org.name)
def delete_poll_results(self): from ureport.utils import chunk_list results_ids = PollResult.objects.filter( org_id=self.org_id, flow=self.flow_uuid).values_list('pk', flat=True) results_ids_count = len(results_ids) for batch in chunk_list(results_ids, 1000): PollResult.objects.filter(pk__in=batch).delete() print "Deleted %d poll results for poll #%d on org #%d" % ( results_ids_count, self.pk, self.org_id) cache.delete(Poll.POLL_PULL_ALL_RESULTS_AFTER_DELETE_FLAG % (self.org_id, self.pk)) cache.delete(Poll.POLL_RESULTS_CURSOR_AFTER_CACHE_KEY % (self.org.pk, self.flow_uuid)) cache.delete(Poll.POLL_RESULTS_CURSOR_BEFORE_CACHE_KEY % (self.org.pk, self.flow_uuid)) cache.delete(Poll.POLL_RESULTS_BATCHES_LATEST_CACHE_KEY % (self.org.pk, self.flow_uuid)) cache.delete(Poll.POLL_RESULTS_LAST_PULL_CACHE_KEY % (self.org.pk, self.flow_uuid)) cache.delete(Poll.POLL_RESULTS_LAST_PULL_CURSOR % (self.org.pk, self.flow_uuid))
def rebuild_poll_results_counts(self): from ureport.utils import chunk_list import time start = time.time() poll_id = self.pk org_id = self.org_id flow = self.flow_uuid r = get_redis_connection() key = Poll.POLL_REBUILD_COUNTS_LOCK % (org_id, flow) if r.get(key): print "Already rebuilding counts for poll #%d on org #%d" % (poll_id, org_id) else: with r.lock(key): poll_results_ids = PollResult.objects.filter(org_id=org_id, flow=flow).values_list('pk', flat=True) poll_results_ids_count = len(poll_results_ids) print "Results query time for pair %s, %s took %ds" % (org_id, flow, time.time() - start) processed_results = 0 counters_dict = defaultdict(int) for batch in chunk_list(poll_results_ids, 1000): poll_results = list(PollResult.objects.filter(pk__in=batch)) for result in poll_results: gen_counters = result.generate_counters() for dict_key in gen_counters.keys(): counters_dict[(result.org_id, result.ruleset, dict_key)] += gen_counters[dict_key] processed_results += 1 print "Rebuild counts progress... build counters dict for pair %s, %s, processed %d of %d in %ds" % (org_id, flow, processed_results, poll_results_ids_count, time.time() - start) counters_to_insert = [] for counter_tuple in counters_dict.keys(): org_id, ruleset, counter_type = counter_tuple count = counters_dict[counter_tuple] counters_to_insert.append(PollResultsCounter(org_id=org_id, ruleset=ruleset, type=counter_type, count=count)) # Delete existing counters and then create new counters self.delete_poll_results_counter() PollResultsCounter.objects.bulk_create(counters_to_insert) print "Finished Rebuilding the counters for poll #%d on org #%d in %ds, inserted %d counters objects for %s results" % (poll_id, org_id, time.time() - start, len(counters_to_insert), poll_results_ids_count) start_update_cache = time.time() self.update_questions_results_cache() print "Calculated questions results and updated the cache for poll #%d on org #%d in %ds" % (poll_id, org_id, time.time() - start_update_cache) print "Poll responses counts for poll #%d on org #%d are %s responded out of %s polled" % (poll_id, org_id, self.responded_runs(), self.runs())
def _iter_poll_record_runs(self, archive, poll_flow_uuid): for record_batch in chunk_list(self._iter_archive_records(archive, poll_flow_uuid), 1000): matching = [] for record in record_batch: if record["flow"]["uuid"] == poll_flow_uuid: record.update(start=None) matching.append(record) yield Run.deserialize_list(matching)
def _iter_poll_record_runs(self, archive, poll_flow_uuid): for record_batch in chunk_list( self._iter_archive_records(archive, poll_flow_uuid), 1000): matching = [] for record in record_batch: if record["flow"]["uuid"] == poll_flow_uuid: record.update(start=None) matching.append(record) yield Run.deserialize_list(matching)
def delete_poll_results(self): from ureport.utils import chunk_list results_ids = PollResult.objects.filter(org_id=self.org_id, flow=self.flow_uuid).values_list('pk', flat=True) results_ids_count = len(results_ids) for batch in chunk_list(results_ids, 1000): PollResult.objects.filter(pk__in=batch).delete() print "Deleted %d poll results for poll #%d on org #%d" % (results_ids_count, self.pk, self.org_id) cache.delete(Poll.POLL_PULL_ALL_RESULTS_AFTER_DELETE_FLAG % (self.org_id, self.pk))
def delete_poll_results_counter(self): from ureport.utils import chunk_list rulesets = self.questions.all().values_list('ruleset_uuid', flat=True) counters_ids = PollResultsCounter.objects.filter(org_id=self.org_id, ruleset__in=rulesets) counters_ids = counters_ids.values_list('pk', flat=True) counters_ids_count = len(counters_ids) for batch in chunk_list(counters_ids, 1000): PollResultsCounter.objects.filter(pk__in=batch).delete() print "Deleted %d poll results counters for poll #%d on org #%d" % (counters_ids_count, self.pk, self.org_id)
def populate_age_and_gender_on_poll_results(apps, schema_editor): Contact = apps.get_model("contacts", "Contact") PollResult = apps.get_model("polls", "PollResult") all_contacts = Contact.objects.all().values_list('id', flat=True) start = time.time() i = 0 for contact_id_batch in chunk_list(all_contacts, 1000): contacts = Contact.objects.filter(id__in=contact_id_batch) for contact in contacts: i += 1 results_ids = PollResult.objects.filter(contact=contact.uuid).values_list('id', flat=True) PollResult.objects.filter(id__in=results_ids).update(born=contact.born, gender=contact.gender) print "Processed poll results update %d / %d contacts in %ds" % (i, len(all_contacts), time.time() - start)
def populate_boundary_backend(apps, schema_editor): Boundary = apps.get_model("locations", "Boundary") Org = apps.get_model("orgs", "Org") start = time.time() for org in Org.objects.all(): backend = org.backends.filter(slug="rapidpro").first() boundaries_ids = Boundary.objects.filter(org=org).values_list("id", flat=True) i = 0 for batch in chunk_list(boundaries_ids, 1000): updated = Boundary.objects.filter(id__in=batch).update(backend=backend) i += updated logger.info("Processed %d / %d boundaires in %ds" % (i, len(boundaries_ids), time.time() - start)) logger.info("Finished setting boundaries backend for org %s" % org.name)
def populate_age_and_gender_on_poll_results(apps, schema_editor): Contact = apps.get_model("contacts", "Contact") PollResult = apps.get_model("polls", "PollResult") all_contacts = Contact.objects.all().values_list('id', flat=True) start = time.time() i = 0 for contact_id_batch in chunk_list(all_contacts, 1000): contacts = Contact.objects.filter(id__in=contact_id_batch) for contact in contacts: i += 1 results_ids = PollResult.objects.filter( contact=contact.uuid).values_list('id', flat=True) PollResult.objects.filter(id__in=results_ids).update( born=contact.born, gender=contact.gender) print "Processed poll results update %d / %d contacts in %ds" % ( i, len(all_contacts), time.time() - start)
def populate_boundary_backend(apps, schema_editor): Boundary = apps.get_model('locations', 'Boundary') Org = apps.get_model('orgs', 'Org') start = time.time() for org in Org.objects.all(): backend = org.backends.filter(slug="rapidpro").first() boundaries_ids = Boundary.objects.filter(org=org).values_list( 'id', flat=True) i = 0 for batch in chunk_list(boundaries_ids, 1000): updated = Boundary.objects.filter(id__in=batch).update( backend=backend) i += updated print("Processed %d / %d boundaires in %ds" % (i, len(boundaries_ids), time.time() - start)) print("Finished setting boundaries backend for org %s" % org.name)
def recalculate_reporters_stats(cls, org): ReportersCounter.objects.filter(org_id=org.id).delete() all_contacts = Contact.objects.filter(org=org).values_list( "id", flat=True).order_by("id") start = time.time() i = 0 all_contacts = list(all_contacts) all_contacts_count = len(all_contacts) counters_dict = defaultdict(int) for contact_id_batch in chunk_list(all_contacts, 1000): contact_batch = list(contact_id_batch) contacts = Contact.objects.filter(id__in=contact_batch) for contact in contacts: i += 1 gen_counters = contact.generate_counters() for dict_key in gen_counters.keys(): counters_dict[(org.id, dict_key)] += gen_counters[dict_key] counters_to_insert = [] for counter_tuple in counters_dict.keys(): org_id, counter_type = counter_tuple count = counters_dict[counter_tuple] counters_to_insert.append( ReportersCounter(org_id=org_id, type=counter_type, count=count)) ReportersCounter.objects.bulk_create(counters_to_insert) logger.info( "Finished Rebuilding the contacts reporters counters for org #%d in %ds, inserted %d counters objects for %s contacts" % (org.id, time.time() - start, len(counters_to_insert), all_contacts_count)) return counters_dict