Example #1
0
def trim_webhook_event_task():
    """
    Runs daily and clears any webhoook events older than settings.SUCCESS_LOGS_TRIM_TIME(default: 48) hours.
    """

    # keep success messages for only SUCCESS_LOGS_TRIM_TIME hours
    success_logs_trim_time = settings.SUCCESS_LOGS_TRIM_TIME

    # keep errors for ALL_LOGS_TRIM_TIME days
    all_logs_trim_time = settings.ALL_LOGS_TRIM_TIME

    if success_logs_trim_time:
        success_log_later = timezone.now() - timedelta(
            hours=success_logs_trim_time)
        event_ids = WebHookEvent.objects.filter(
            created_on__lte=success_log_later,
            status=WebHookEvent.STATUS_COMPLETE)
        event_ids = event_ids.values_list('id', flat=True)
        for batch in chunk_list(event_ids, 1000):
            WebHookEvent.objects.filter(id__in=batch).delete()

    if all_logs_trim_time:
        all_log_later = timezone.now() - timedelta(hours=all_logs_trim_time)
        event_ids = WebHookEvent.objects.filter(created_on__lte=all_log_later)
        event_ids = event_ids.values_list('id', flat=True)
        for batch in chunk_list(event_ids, 1000):
            WebHookEvent.objects.filter(id__in=batch).delete()
Example #2
0
def trim_webhook_event_task():
    """
    Runs daily and clears any webhoook events older than settings.SUCCESS_LOGS_TRIM_TIME(default: 48) hours.
    """

    # keep success messages for only SUCCESS_LOGS_TRIM_TIME hours
    success_logs_trim_time = settings.SUCCESS_LOGS_TRIM_TIME

    # keep errors for ALL_LOGS_TRIM_TIME days
    all_logs_trim_time = settings.ALL_LOGS_TRIM_TIME

    if success_logs_trim_time:
        success_log_later = timezone.now() - timedelta(hours=success_logs_trim_time)
        event_ids = WebHookEvent.objects.filter(created_on__lte=success_log_later, status=WebHookEvent.STATUS_COMPLETE)
        event_ids = event_ids.values_list("id", flat=True)
        for batch in chunk_list(event_ids, 1000):
            for event in WebHookEvent.objects.filter(id__in=batch):
                event.release()

    if all_logs_trim_time:
        all_log_later = timezone.now() - timedelta(hours=all_logs_trim_time)
        event_ids = WebHookEvent.objects.filter(created_on__lte=all_log_later)
        event_ids = event_ids.values_list("id", flat=True)
        for batch in chunk_list(event_ids, 1000):
            for event in WebHookEvent.objects.filter(id__in=batch):
                event.release()
Example #3
0
def migrate_flows(min_version=None):  # pragma: no cover
    to_version = min_version or get_current_export_version()

    # get all flows below the min version
    old_versions = Flow.get_versions_before(to_version)

    flows_to_migrate = Flow.objects.filter(is_active=True, version_number__in=old_versions)

    flow_ids = list(flows_to_migrate.values_list("id", flat=True))
    total = len(flow_ids)

    if not total:
        print("All flows up to date")
        return True

    print("Found %d flows to migrate to %s..." % (len(flow_ids), to_version))

    num_updated = 0
    errored = []

    for id_batch in chunk_list(flow_ids, 1000):
        for flow in Flow.objects.filter(id__in=id_batch):
            try:
                flow.ensure_current_version(min_version=to_version)
                num_updated += 1
            except Exception:
                print("Unable to migrate flow '%s' (#%d)" % (flow.name, flow.id))
                errored.append(flow)

        print(" > Flows migrated: %d of %d (%d errored)" % (num_updated, total, len(errored)))

    if errored:
        print(" > Errored flows: %s" % (", ".join([str(e.id) for e in errored])))

    return len(errored) == 0
def migrate_flows(min_version=None):  # pragma: no cover
    to_version = min_version or Flow.FINAL_LEGACY_VERSION

    # get all flows below the min version
    old_versions = Flow.get_versions_before(to_version)

    flows_to_migrate = Flow.objects.filter(is_active=True, version_number__in=old_versions)

    flow_ids = list(flows_to_migrate.values_list("id", flat=True))
    total = len(flow_ids)

    if not total:
        print("All flows up to date")
        return True

    print(f"Found {len(flow_ids)} flows to migrate to {to_version}...")

    num_updated = 0
    num_errored = 0

    for id_batch in chunk_list(flow_ids, 1000):
        for flow in Flow.objects.filter(id__in=id_batch):
            try:
                flow.ensure_current_version(min_version=to_version)
                num_updated += 1
            except Exception:
                print(f"Unable to migrate flow '{flow.name}' ({str(flow.uuid)}):")
                print(traceback.format_exc())
                num_errored += 1

        print(f" > Flows migrated: {num_updated} of {total} ({num_errored} errored)")

    return num_errored == 0
def populate_recent_runs(FlowPathRecentMessage, FlowPathRecentRun):
    recent_msgs = FlowPathRecentMessage.objects.order_by("id")

    # don't convert any records that have already been added for new runs
    new_for_run = FlowPathRecentRun.objects.order_by("id").first()
    if new_for_run:
        recent_msgs = recent_msgs.filter(created_on__lt=new_for_run.visited_on)

    recent_msgs_count = recent_msgs.count()
    if not recent_msgs_count:
        return

    num_converted = 0
    for recent_msg_batch in chunk_list(
            recent_msgs.using("direct").iterator(), 1000):
        with transaction.atomic():
            for recent_msg in recent_msg_batch:

                FlowPathRecentRun.objects.create(
                    from_uuid=recent_msg.from_uuid,
                    to_uuid=recent_msg.to_uuid,
                    run=recent_msg.run,
                    visited_on=recent_msg.created_on,
                )

            num_converted += len(recent_msg_batch)

        print(" > Converted %d of %d recent messages to recent runs" %
              (num_converted, recent_msgs_count))
Example #6
0
    def handle(self, file_path: str, batch_size: int, tps: int, *args,
               **options):
        with open(file_path) as id_file:
            msg_ids = [int(line) for line in id_file.readlines() if line]
            msg_ids = sorted(msg_ids)

        self.stdout.write(f"> loaded {len(msg_ids)} msg ids from {file_path}")

        num_batches = math.ceil(len(msg_ids) / batch_size)
        batch_send_time = int(batch_size /
                              tps)  # estimated time to send a batch in seconds
        batch_num = 0
        next_attempt = timezone.now()

        self.stdout.write(
            f"> estimated batch send time of {batch_send_time} seconds at {tps} TPS"
        )

        for id_batch in chunk_list(msg_ids, batch_size):
            # only fetch messages which are WIRED and have never errored
            batch = Msg.objects.filter(id__in=id_batch,
                                       status=Msg.STATUS_WIRED,
                                       error_count=0)
            num_updated = batch.update(status=Msg.STATUS_ERRORED,
                                       error_count=1,
                                       next_attempt=next_attempt)

            self.stdout.write(
                f"> batch {batch_num+1}/{num_batches}"
                f" - dewired {num_updated} msg ids, next_attempt={next_attempt.isoformat()}"
            )

            batch_num += 1
            next_attempt = next_attempt + timedelta(seconds=batch_send_time)
def populate_responded_batch(batch_size, Msg, FlowRun):
    # grab ids of a batch of runs with null responded
    run_ids = FlowRun.objects.filter(responded=None)
    run_ids = list(run_ids.values_list('pk', flat=True)[:batch_size])

    if not run_ids:
        return 0, 0

    print "Fetched ids of %d runs with no responded value..." % len(run_ids)

    total_with, total_without = 0, 0

    for batch_ids in chunk_list(run_ids, UPDATE_BATCH_SIZE):
        batch_ids = list(batch_ids)

        # which of the runs in this batch have responses?
        msg_responses = Msg.objects.filter(direction='I', steps__run__pk__in=batch_ids)
        with_responses = msg_responses.values_list('steps__run', flat=True)

        with_responses = set(with_responses)
        without_responses = [run_id for run_id in batch_ids if run_id not in with_responses]

        # update our batches of responded/un-responded
        if with_responses:
            FlowRun.objects.filter(pk__in=with_responses).update(responded=True)
        if without_responses:
            FlowRun.objects.filter(pk__in=without_responses).update(responded=False)

        total_with += len(with_responses)
        total_without += len(without_responses)

        print " > Updated %d of %d runs batch" % (total_with + total_without, len(run_ids))

    return total_with, total_without
Example #8
0
    def __init__(self, records: List[Dict], max_payload_size: int = 256):
        # serialize records as a JSONL payload
        buffer = io.BytesIO()
        for record in records:
            buffer.write(json.dumps(record).encode("utf-8"))
            buffer.write(b"\n")

        payload = buffer.getvalue()
        payload_chunks = chunk_list(payload, size=max_payload_size)

        self.events = [{
            "Records": {
                "Payload": chunk
            }
        } for chunk in payload_chunks]
        self.events.append(
            {
                "Stats": {
                    "Details": {
                        "BytesScanned": 123,
                        "BytesProcessed": 234,
                        "BytesReturned": len(payload)
                    }
                }
            }, )
        self.events.append({"End": {}})
Example #9
0
    def handle(self, start_id: int, event_types: list, dry_run: bool, quiet: bool, *args, **options):
        start = FlowStart.objects.filter(id=start_id).first()
        if not start:
            raise CommandError("no such flow start")

        undoers = {t: clazz(self.stdout) for t, clazz in UNDO_CLASSES.items() if not event_types or t in event_types}

        undo_types = ", ".join(sorted(undoers.keys())) if undoers else "no"
        desc = f"{undo_types} events for start #{start.id} of '{start.flow}' flow in the '{start.org.name}' workspace"
        if quiet:
            self.stdout.write(f"Undoing {desc}...")
        else:
            if input(f"Undo {desc}? [y/N]: ") != "y":
                return

        self.stdout.write("Fetching run ids... ", ending="")
        run_ids = list(start.runs.values_list("id", flat=True))
        self.stdout.write(f"found {len(run_ids)}")

        num_fixed = 0

        # process runs in batches
        for run_id_batch in chunk_list(run_ids, self.batch_size):
            run_batch = list(FlowRun.objects.filter(id__in=run_id_batch).only("id", "contact_id", "session_id"))

            self.undo_for_batch(run_batch, undoers, dry_run)
            num_fixed += len(run_batch)

            self.stdout.write(f" > Fixed {num_fixed} contacts")

        # print summaries of the undoers
        for undoer in undoers.values():
            undoer.print_summary()
def bulk_exit(runs, exit_type, exited_on=None):
    from temba.flows.models import Flow, FlowRun

    if isinstance(runs, list):
        runs = [{'id': r.pk, 'flow_id': r.flow_id} for r in runs]
    else:
        runs = list(runs.values('id', 'flow_id'))  # select only what we need...

    # organize runs by flow
    runs_by_flow = defaultdict(list)
    for run in runs:
        runs_by_flow[run['flow_id']].append(run['id'])

    # for each flow, remove activity for all runs
    for flow_id, run_ids in runs_by_flow.iteritems():
        flow = Flow.objects.filter(id=flow_id).first()

        if flow:
            flow.remove_active_for_run_ids(run_ids)

    modified_on = timezone.now()
    if not exited_on:
        exited_on = modified_on

    from temba.flows.tasks import continue_parent_flows

    # batch this for 1,000 runs at a time so we don't grab locks for too long
    for batch in chunk_list(runs, 1000):
        ids = [r['id'] for r in batch]
        run_objs = FlowRun.objects.filter(pk__in=ids)
        run_objs.update(is_active=False, exited_on=exited_on, exit_type=exit_type, modified_on=modified_on)

        # continue the parent flows to continue async
        continue_parent_flows.delay(ids)
Example #11
0
def migrate_flow_activity(Flow, FlowPathCount, FlowPathRecentMessage):
    """
    Converts old path count and recent message records (rule_uuid/node_uuid -> node_uuid) to be (exit_uuid -> node_uuid)
    """
    # start by ensuring all flows are at a minimum version (the one that added exit_uuid to actionsets)
    if not migrate_flows('10.4'):
        raise ValueError("Migration can't proceed because some flows couldn't be migrated")

    flow_ids = list(Flow.objects.filter(is_active=True).values_list('id', flat=True))
    if not flow_ids:
        return

    print("Found %d active flows to migrate activity for..." % len(flow_ids))

    num_updated = 0
    for id_batch in chunk_list(flow_ids, 1000):
        flows = Flow.objects.filter(id__in=id_batch).prefetch_related('action_sets')

        for flow in flows:
            with transaction.atomic():
                for action_set in flow.action_sets.all():
                    FlowPathCount.objects.filter(flow=flow, from_uuid=action_set.uuid).update(from_uuid=action_set.exit_uuid)
                    FlowPathRecentMessage.objects.filter(from_uuid=action_set.uuid).update(from_uuid=action_set.exit_uuid)

            num_updated += 1

        print(" > Updated %d of %d flows" % (num_updated, len(flow_ids)))
def apply_as_migration(apps, schema_editor):
    Flow = apps.get_model("flows", "Flow")
    FlowRun = apps.get_model("flows", "FlowRun")
    FlowStep = apps.get_model("flows", "FlowStep")

    flows = Flow.objects.filter(is_archived=True)
    for flow in flows:
        runs = FlowRun.objects.filter(is_active=True,
                                      exit_type=None,
                                      flow_id=flow.id)
        run_ids = list(runs.values_list("id", flat=True))

        # batch this for 1,000 runs at a time so we don't grab locks for too long
        for id_batch in chunk_list(run_ids, 1000):
            now = timezone.now()

            # mark all steps in these runs as having been left
            FlowStep.objects.filter(run__id__in=id_batch,
                                    left_on=None).update(left_on=now)

            runs = FlowRun.objects.filter(id__in=id_batch)
            runs.update(is_active=False,
                        exited_on=now,
                        exit_type="I",
                        modified_on=now)
Example #13
0
def migrate_flows():  # pragma: no cover
    flows_to_migrate = (Flow.objects.filter(is_active=True).exclude(
        version_number=Flow.FINAL_LEGACY_VERSION).exclude(
            version_number=Flow.CURRENT_SPEC_VERSION))

    flow_ids = list(flows_to_migrate.values_list("id", flat=True))
    total = len(flow_ids)

    if not total:
        print("All flows up to date")
        return True

    print(f"Found {len(flow_ids)} flows to migrate...")

    num_updated = 0
    num_errored = 0

    for id_batch in chunk_list(flow_ids, 5000):
        for flow in Flow.objects.filter(id__in=id_batch):
            try:
                flow.ensure_current_version()
                num_updated += 1
            except Exception:
                print(
                    f"Unable to migrate flow[uuid={str(flow.uuid)} name={flow.name} created_on={flow.created_on.isoformat()}]':"
                )
                print(traceback.format_exc())
                num_errored += 1

        print(
            f" > Flows migrated: {num_updated} of {total} ({num_errored} errored)"
        )

    return num_errored == 0
def fix_flow_types(apps, schema_editor):
    Flow = apps.get_model("flows", "Flow")

    num_updated = 0
    for batch in chunk_list(Flow.objects.filter(flow_type="F"), 1000):
        Flow.objects.filter(id__in=[f.id for f in batch]).update(flow_type="M")
        num_updated += len(batch)
        print(f" > Updated {num_updated} flows with type F to type M")
Example #15
0
 def trim(cls):
     """
     Deletes all HTTP Logs older than 3 days, 1000 at a time
     """
     cutoff = timezone.now() - timedelta(days=3)
     ids = HTTPLog.objects.filter(created_on__lte=cutoff).values_list("id", flat=True)
     for chunk in chunk_list(ids, 1000):
         HTTPLog.objects.filter(id__in=chunk).delete()
Example #16
0
def refresh_whatsapp_contacts(channel_id):
    r = get_redis_connection()
    key = "refresh_whatsapp_contacts_%d" % channel_id

    # we can't use our non-overlapping task decorator as it creates a loop in the celery resolver when registering
    if r.get(key):  # pragma: no cover
        return

    channel = Channel.objects.filter(id=channel_id, is_active=True).first()
    if not channel:  # pragma: no cover
        return

    with r.lock(key, 3600):
        # look up all whatsapp URNs for this channel
        wa_urns = (ContactURN.objects.filter(
            org_id=channel.org_id,
            scheme=WHATSAPP_SCHEME,
            contact__is_stopped=False,
            contact__is_blocked=False).exclude(contact=None).only(
                "id", "path"))

        # 1,000 contacts at a time, we ask WhatsApp to look up our contacts based on the path
        refreshed = 0

        for urn_batch in chunk_list(wa_urns, 1000):
            # need to wait 10 seconds between each batch of 1000
            if refreshed > 0:  # pragma: no cover
                time.sleep(10)

            # build a list of the fully qualified numbers we have
            contacts = ["+%s" % u.path for u in urn_batch]
            payload = {"blocking": "wait", "contacts": contacts}

            # go fetch our contacts
            headers = {
                "Authorization":
                "Bearer %s" % channel.config[Channel.CONFIG_AUTH_TOKEN]
            }
            url = channel.config[Channel.CONFIG_BASE_URL] + "/v1/contacts"

            start = timezone.now()
            resp = requests.post(url, json=payload, headers=headers)
            elapsed = (timezone.now() - start).total_seconds() * 1000

            HTTPLog.create_from_response(HTTPLog.WHATSAPP_CONTACTS_REFRESHED,
                                         url,
                                         resp,
                                         channel=channel,
                                         request_time=elapsed)

            # if we had an error, break out
            if resp.status_code != 200:
                break

            refreshed += len(urn_batch)

        print("refreshed %d whatsapp urns for channel %d" %
              (refreshed, channel_id))
Example #17
0
def trim_channel_log_task():
    """
    Trims old channel logs
    """

    trim_before = timezone.now() - settings.RETENTION_PERIODS["channellog"]

    ids = ChannelLog.objects.filter(created_on__lte=trim_before).values_list("id", flat=True)
    for chunk in chunk_list(ids, 1000):
        ChannelLog.objects.filter(id__in=chunk).delete()
def populate_flowsteps_for_broadcast(RelatedBroadcast, RelatedMsg, MsgManager, broadcast, batch):
    msg_ids = MsgManager.filter(broadcast=broadcast.id).values_list('id', flat=True)
    start_count = len(batch)

    for msg_id_batch in chunk_list(set(msg_ids), 1000):
        fs_ids = set(RelatedMsg.objects.filter(msg_id__in=msg_id_batch).values_list('flowstep_id', flat=True))
        broadcast_batch = [RelatedBroadcast(flowstep_id=fs_id, broadcast_id=broadcast.id) for fs_id in fs_ids]
        batch += broadcast_batch

    return len(batch) - start_count
Example #19
0
def clear_next_attempt(apps, schema_editor):  # pragma: no cover
    Msg = apps.get_model("msgs", "Msg")
    Channel = apps.get_model("channels", "Channel")

    android_ids = Channel.objects.filter(channel_type="A").values_list(
        "id", flat=True)

    for android_chunk in chunk_list(android_ids, 100):
        Msg.objects.filter(channel_id__in=android_chunk).update(
            next_attempt=None)
Example #20
0
def trim_event_fires_task():
    start = timezone.now()
    boundary = timezone.now() - timedelta(days=settings.EVENT_FIRE_TRIM_DAYS)
    trim_ids = EventFire.objects.filter(fired__lt=boundary).values_list(
        "id", flat=True).order_by("fired")[:100000]
    for batch in chunk_list(trim_ids, 100):
        # use a bulk delete for performance reasons, nothing references EventFire
        EventFire.objects.filter(id__in=batch).delete()

    print(f"Deleted {len(trim_ids)} event fires in {timezone.now()-start}")
Example #21
0
def do_populate_send_all(Broadcast):
    broadcast_ids = Broadcast.objects.all().values_list('id', flat=True)

    broadcast_count = len(broadcast_ids)
    if broadcast_count:
        print('Starting to update %d broadcasts send all field...' % broadcast_count)

    updated = 0
    for chunk in chunk_list(broadcast_ids, 5000):
        Broadcast.objects.filter(pk__in=chunk).update(send_all=False)
        print("Updated %d of %d broadcasts" % (updated + len(chunk), broadcast_count))
def populate_is_system(apps, schema_editor):
    Flow = apps.get_model("flows", "Flow")

    total = Flow.objects.filter(is_system=None).count()
    if total:
        print(f"Updating is_system on {total} flows...")

    num_updated = 0

    for batch in chunk_list(Flow.objects.filter(is_system=None).exclude(flow_type="M"), 1000):
        Flow.objects.filter(id__in=[f.id for f in batch]).update(is_system=False)

        num_updated += len(batch)
        print(f" > Updated {num_updated} of {total} flows")

    for batch in chunk_list(Flow.objects.filter(is_system=None).filter(flow_type="M"), 1000):
        Flow.objects.filter(id__in=[f.id for f in batch]).update(is_system=True)

        num_updated += len(batch)
        print(f" > Updated {num_updated} of {total} flows")
Example #23
0
def release_contacts(user_id, contact_ids):
    """
    Releases the given contacts
    """
    user = User.objects.get(pk=user_id)

    for id_batch in chunk_list(contact_ids, 100):
        batch = Contact.objects.filter(id__in=id_batch,
                                       is_active=True).prefetch_related("urns")
        for contact in batch:
            contact.release(user)
def backfill_flowsteps(FlowStep, Broadcast, MsgManager):
    # we keep track of our completed broadcasts so we can pick up where we left off if interrupted
    r = get_redis_connection()
    highpoint = r.get(HIGHPOINT_KEY)
    if highpoint is None:
        highpoint = 0

    RelatedBroadcast = FlowStep.broadcasts.through
    RelatedMsg = FlowStep.messages.through

    broadcast_ids = Broadcast.objects.filter(
        id__gt=highpoint).order_by('id').values_list('id', flat=True)
    start = time.time()
    batch = []
    i = 0

    for broadcast_id_batch in chunk_list(broadcast_ids, 1000):
        broadcasts = Broadcast.objects.filter(
            id__in=broadcast_id_batch).order_by('id').only('id')
        for broadcast in broadcasts:
            i += 1

            # clear any current relations on this broadcast
            RelatedBroadcast.objects.filter(broadcast_id=broadcast.id).delete()

            populate_flowsteps_for_broadcast(RelatedBroadcast, RelatedMsg,
                                             MsgManager, broadcast, batch)
            if len(batch) > 1000:
                for broadcast_batch in chunk_list(batch, 1000):
                    RelatedBroadcast.objects.bulk_create(broadcast_batch)
                r.set(HIGHPOINT_KEY, broadcast.id)
                batch = []

        print "Processed %d / %d (batch size %d) in %d" % (
            i, len(broadcast_ids), len(batch), int(time.time() - start))

    for broadcast_batch in chunk_list(batch, 1000):
        RelatedBroadcast.objects.bulk_create(broadcast_batch)

    # we finished, no need to track any more status
    r.delete(HIGHPOINT_KEY)
def backfill_urn_identity(apps, schema_editor):
    ContactURN = apps.get_model('contacts', 'ContactURN')

    urns = ContactURN.objects.filter(identity=None).values_list('id', flat=True)
    count = 0

    print("found %d urns to backfill" % len(urns))

    for batch in chunk_list(urns, 1000):
        ContactURN.objects.filter(id__in=batch).update(identity=F('urn'))
        count += len(batch)
        print("backfilled %d of %d URNs" % (count, len(urns)))
Example #26
0
def trim_webhook_event_task():
    """
    Trims old webhook events
    """

    if settings.RETENTION_PERIODS["webhookevent"]:
        trim_before = timezone.now(
        ) - settings.RETENTION_PERIODS["webhookevent"]
        event_ids = WebHookEvent.objects.filter(
            created_on__lte=trim_before).values_list("id", flat=True)
        for batch in chunk_list(event_ids, 1000):
            WebHookEvent.objects.filter(id__in=batch).delete()
def delete_inactive_channelevents(apps, schema_editor):
    ChannelEvent = apps.get_model('channels', 'ChannelEvent')

    # delete all channel events that are inactive, we don't care to keep those around
    ids = ChannelEvent.objects.filter(is_active=False).values_list('id',
                                                                   flat=True)
    print("Found %d channel events to delete" % len(ids))
    count = 0
    for chunk in chunk_list(ids, 1000):
        ChannelEvent.objects.filter(id__in=chunk).delete()
        count += len(chunk)
        print("Deleted %d" % count)
Example #28
0
def refresh_whatsapp_contacts(channel_id):
    r = get_redis_connection()
    key = 'refresh_whatsapp_contacts_%d' % channel_id

    # we can't use our non-overlapping task decorator as it creates a loop in the celery resolver when registering
    if r.get(key):  # pragma: no cover
        return

    channel = Channel.objects.filter(id=channel_id, is_active=True).first()
    if not channel:  # pragma: no cover
        return

    with r.lock(key, 3600):
        # look up all whatsapp URNs for this channel
        wa_urns = (ContactURN.objects.filter(
            org_id=channel.org_id,
            scheme=WHATSAPP_SCHEME,
            contact__is_stopped=False,
            contact__is_blocked=False).exclude(contact=None).only(
                'id', 'path'))

        # 1,000 contacts at a time, we ask WhatsApp to look up our contacts based on the path
        refreshed = 0

        for urn_batch in chunk_list(wa_urns, 1000):
            # need to wait 10 seconds between each batch of 1000
            if refreshed > 0:  # pragma: no cover
                time.sleep(10)

            # build a list of the fully qualified numbers we have
            contacts = ["+%s" % u.path for u in urn_batch]
            payload = {"blocking": "wait", "contacts": contacts}

            # go fetch our contacts
            headers = {
                "Authorization":
                "Bearer %s" % channel.config[Channel.CONFIG_AUTH_TOKEN]
            }
            resp = requests.post(channel.config[Channel.CONFIG_BASE_URL] +
                                 '/v1/contacts',
                                 json=payload,
                                 headers=headers)

            # if we had an error, break out
            if resp.status_code != 200 or resp.json().get('error', True):
                raise Exception("Received error refreshing contacts for %d",
                                channel.id)

            refreshed += len(urn_batch)

        print("refreshed %d whatsapp urns for channel %d" %
              (refreshed, channel_id))
Example #29
0
def clear_old_msg_external_ids():
    """
    Clears external_id on older messages to reduce the size of the index on that column. External ids aren't surfaced
    anywhere and are only used for debugging channel issues, so are of limited usefulness on older messages.
    """
    threshold = timezone.now() - timedelta(days=30)  # 30 days ago

    msg_ids = list(Msg.objects.filter(created_on__lt=threshold).exclude(external_id=None).values_list('id', flat=True))

    for msg_id_batch in chunk_list(msg_ids, 1000):
        Msg.objects.filter(id__in=msg_id_batch).update(external_id=None)

    print("Cleared external ids on %d messages" % len(msg_ids))
Example #30
0
def migrate_duration_extra(apps, schema_editor):
    ChannelEvent = apps.get_model('channels', 'ChannelEvent')

    # find all events with a duration and convert them to extra
    ids = ChannelEvent.objects.filter(duration__gte=0).values_list('id', flat=True)
    if ids:
        print("Found %d channel events to set extra on" % len(ids))

    count = 0
    for chunk in chunk_list(ids, 250):
        ChannelEvent.objects.filter(id__in=chunk).update(extra=Concat(Value('{"duration":'), F('duration'), Value('}'), output_field=TextField()))
        count += len(chunk)
        print("Updated %d" % count)
Example #31
0
def clear_old_msg_external_ids():
    """
    Clears external_id on older messages to reduce the size of the index on that column. External ids aren't surfaced
    anywhere and are only used for debugging channel issues, so are of limited usefulness on older messages.
    """
    threshold = timezone.now() - timedelta(days=30)  # 30 days ago

    msg_ids = list(Msg.objects.filter(created_on__lt=threshold).exclude(external_id=None).values_list('id', flat=True))

    for msg_id_batch in chunk_list(msg_ids, 1000):
        Msg.objects.filter(pk__in=msg_id_batch).update(external_id=None)

    print("Cleared external ids on %d messages" % len(msg_ids))
Example #32
0
    def derive_opt_outs(apps, schema_editor):
        from temba.contacts.models import Contact, ContactGroup

        # remap our group types to reflect failed becoming stopped
        ContactGroup.system_groups.filter(group_type='F').update(group_type='S')

        # now unstop any contacts that belong to groups, these are temporary failures
        failed_ids = Contact.objects.filter(is_active=True, is_stopped=True, all_groups__group_type='U').distinct().values_list('id', flat=True)
        for chunk_ids in chunk_list(failed_ids, 100):
            contacts = Contact.objects.filter(id__in=chunk_ids)
            for contact in contacts:
                contact.unstop(contact.modified_by)
                print "unstopped: %d" % contact.id
Example #33
0
def update_session_wait_expires(flow_id):
    """
    Update the wait_expires_on of any session currently waiting in the given flow
    """

    flow = Flow.objects.get(id=flow_id)
    session_ids = flow.sessions.filter(
        status=FlowSession.STATUS_WAITING).values_list("id", flat=True)

    for id_batch in chunk_list(session_ids, 1000):
        batch = FlowSession.objects.filter(id__in=id_batch)
        batch.update(wait_expires_on=F("wait_started_on") +
                     timedelta(minutes=flow.expires_after_minutes))
def populate_exit_type_batch(batch_size, FlowRun, FlowStep, ActionSet):
    # grab ids of a batch of inactive runs with no exit type
    exited_run_ids = FlowRun.objects.filter(is_active=False, exit_type=None)
    exited_run_ids = list(exited_run_ids.values_list('pk', flat=True)[:batch_size])

    if not exited_run_ids:
        return 0

    print "Fetched ids of %d potentially expired, completed or stopped runs" % len(exited_run_ids)

    # grab UUIDs of all terminal action sets for quick lookups
    terminal_nodes = set([n['uuid'] for n in ActionSet.objects.filter(destination=None).values('uuid')])
    if terminal_nodes:
        print "Cached %d terminal nodes for run completion calculation" % len(terminal_nodes)

    # pre-fetch required for completion calculation
    steps_prefetch = Prefetch('steps', queryset=FlowStep.objects.order_by('arrived_on'))

    num_updated = 0

    for batch_ids in chunk_list(exited_run_ids, UPDATE_BATCH_SIZE):
        completed_ids = []
        interrupted_ids = []
        expired_ids = []

        for run in FlowRun.objects.filter(pk__in=batch_ids).prefetch_related(steps_prefetch):
            # get last step in this run
            steps = list(run.steps.all())
            last_step = steps[len(steps) - 1] if len(steps) > 0 else None

            if last_step and step_is_terminal(last_step, terminal_nodes):
                completed_ids.append(run.pk)
            elif run.exited_on:
                expired_ids.append(run.pk)
            else:
                interrupted_ids.append(run.pk)

        # update our batches of completed/interrupted/expired, using modified_on as approximate exited_on
        if completed_ids:
            FlowRun.objects.filter(pk__in=completed_ids).update(exited_on=F('modified_on'), exit_type='C')
        if interrupted_ids:
            FlowRun.objects.filter(pk__in=interrupted_ids).update(exited_on=F('modified_on'), exit_type='I')
        if expired_ids:
            FlowRun.objects.filter(pk__in=expired_ids).update(exit_type='E')

        num_updated += len(completed_ids) + len(interrupted_ids) + len(expired_ids)

        print " > Updated %d of %d runs" % (num_updated, len(exited_run_ids))

    return len(exited_run_ids)
def backfill_flowsteps(FlowStep, Broadcast, MsgManager):
    # we keep track of our completed broadcasts so we can pick up where we left off if interrupted
    r = get_redis_connection()
    highpoint = r.get(HIGHPOINT_KEY)
    if highpoint is None:
        highpoint = 0

    RelatedBroadcast = FlowStep.broadcasts.through
    RelatedMsg = FlowStep.messages.through

    broadcast_ids = Broadcast.objects.filter(id__gt=highpoint).order_by('id').values_list('id', flat=True)
    start = time.time()
    batch = []
    i = 0

    for broadcast_id_batch in chunk_list(broadcast_ids, 1000):
        broadcasts = Broadcast.objects.filter(id__in=broadcast_id_batch).order_by('id').only('id')
        for broadcast in broadcasts:
            i += 1

            # clear any current relations on this broadcast
            RelatedBroadcast.objects.filter(broadcast_id=broadcast.id).delete()

            populate_flowsteps_for_broadcast(RelatedBroadcast, RelatedMsg, MsgManager, broadcast, batch)
            if len(batch) > 1000:
                for broadcast_batch in chunk_list(batch, 1000):
                    RelatedBroadcast.objects.bulk_create(broadcast_batch)
                r.set(HIGHPOINT_KEY, broadcast.id)
                batch = []

        print "Processed %d / %d (batch size %d) in %d" % (i, len(broadcast_ids), len(batch), int(time.time() - start))

    for broadcast_batch in chunk_list(batch, 1000):
        RelatedBroadcast.objects.bulk_create(broadcast_batch)

    # we finished, no need to track any more status
    r.delete(HIGHPOINT_KEY)
Example #36
0
def populate_is_system(apps, schema_editor):
    Flow = apps.get_model("flows", "Flow")

    total = Flow.objects.filter(is_system=None).count()
    if total:
        print(f"Updating is_system on {total} flows...")

    num_updated = 0

    for batch in chunk_list(
            Flow.objects.filter(is_system=None).exclude(flow_type="M"), 1000):
        Flow.objects.filter(id__in=[f.id
                                    for f in batch]).update(is_system=False)

        num_updated += len(batch)
        print(f" > Updated {num_updated} of {total} flows")

    for batch in chunk_list(
            Flow.objects.filter(is_system=None).filter(flow_type="M"), 1000):
        Flow.objects.filter(id__in=[f.id
                                    for f in batch]).update(is_system=True)

        num_updated += len(batch)
        print(f" > Updated {num_updated} of {total} flows")
Example #37
0
def trim_flow_starts():
    """
    Cleanup completed non-user created flow starts
    """
    trim_before = timezone.now() - settings.RETENTION_PERIODS["flowstart"]
    num_deleted = 0
    start = timezone.now()

    logger.info(
        f"Deleting completed non-user created flow starts created before {trim_before.isoformat()}"
    )

    while True:
        start_ids = list(
            FlowStart.objects.filter(
                created_by=None,
                status__in=(FlowStart.STATUS_COMPLETE,
                            FlowStart.STATUS_FAILED),
                modified_on__lte=trim_before,
            ).values_list("id", flat=True)[:1000])
        if not start_ids:
            break

        # detach any flows runs that belong to these starts
        run_ids = FlowRun.objects.filter(start_id__in=start_ids).values_list(
            "id", flat=True)[:100000]
        while len(run_ids) > 0:
            for chunk in chunk_list(run_ids, 1000):
                FlowRun.objects.filter(id__in=chunk).update(start_id=None)

            # reselect for our next batch
            run_ids = FlowRun.objects.filter(
                start_id__in=start_ids).values_list("id", flat=True)[:100000]

        FlowStart.contacts.through.objects.filter(
            flowstart_id__in=start_ids).delete()
        FlowStart.groups.through.objects.filter(
            flowstart_id__in=start_ids).delete()
        FlowStartCount.objects.filter(start_id__in=start_ids).delete()
        FlowStart.objects.filter(id__in=start_ids).delete()
        num_deleted += len(start_ids)

        if num_deleted % 10000 == 0:  # pragma: no cover
            logger.debug(f" > Deleted {num_deleted} flow starts")

    logger.info(
        f"Deleted {num_deleted} completed non-user created flow starts in {timesince(start)}"
    )
    def derive_opt_outs(apps, schema_editor):
        from temba.contacts.models import Contact, ContactGroup

        # remap our group types to reflect failed becoming stopped
        ContactGroup.system_groups.filter(group_type='F').update(
            group_type='S')

        # now unstop any contacts that belong to groups, these are temporary failures
        failed_ids = Contact.objects.filter(
            is_active=True, is_stopped=True,
            all_groups__group_type='U').distinct().values_list('id', flat=True)
        for chunk_ids in chunk_list(failed_ids, 100):
            contacts = Contact.objects.filter(id__in=chunk_ids)
            for contact in contacts:
                contact.unstop(contact.modified_by)
                print "unstopped: %d" % contact.id
Example #39
0
def refresh_whatsapp_contacts(channel_id):
    r = get_redis_connection()
    key = "refresh_whatsapp_contacts_%d" % channel_id

    # we can't use our non-overlapping task decorator as it creates a loop in the celery resolver when registering
    if r.get(key):  # pragma: no cover
        return

    channel = Channel.objects.filter(id=channel_id, is_active=True).first()
    if not channel:  # pragma: no cover
        return

    with r.lock(key, 3600):
        # look up all whatsapp URNs for this channel
        wa_urns = (
            ContactURN.objects.filter(
                org_id=channel.org_id, scheme=WHATSAPP_SCHEME, contact__is_stopped=False, contact__is_blocked=False
            )
            .exclude(contact=None)
            .only("id", "path")
        )

        # 1,000 contacts at a time, we ask WhatsApp to look up our contacts based on the path
        refreshed = 0

        for urn_batch in chunk_list(wa_urns, 1000):
            # need to wait 10 seconds between each batch of 1000
            if refreshed > 0:  # pragma: no cover
                time.sleep(10)

            # build a list of the fully qualified numbers we have
            contacts = ["+%s" % u.path for u in urn_batch]
            payload = {"blocking": "wait", "contacts": contacts}

            # go fetch our contacts
            headers = {"Authorization": "Bearer %s" % channel.config[Channel.CONFIG_AUTH_TOKEN]}
            resp = requests.post(
                channel.config[Channel.CONFIG_BASE_URL] + "/v1/contacts", json=payload, headers=headers
            )

            # if we had an error, break out
            if resp.status_code != 200:
                raise Exception("Received error refreshing contacts for %d", channel.id)

            refreshed += len(urn_batch)

        print("refreshed %d whatsapp urns for channel %d" % (refreshed, channel_id))
def populate_recipients_for_broadcast(Broadcast, MsgManager, broadcast_id):
    """
    Populates the recipients for the passed in broadcast, we just select all the
    msgs for this broadcast, then populate the recipients based on the URNs of
    those messages
    """
    urn_ids = MsgManager.filter(broadcast=broadcast_id).values_list("contact_urn_id", flat=True)

    # clear any current recipients, we are rebuilding
    RelatedRecipients = Broadcast.recipients.through
    Broadcast.objects.get(id=broadcast_id).recipients.clear()

    for urn_batch in chunk_list(set(urn_ids), 1000):
        recipient_batch = [RelatedRecipients(contacturn_id=u, broadcast_id=broadcast_id) for u in urn_batch]
        RelatedRecipients.objects.bulk_create(recipient_batch)

    return len(urn_ids)
def populate_recipients_for_broadcast(Broadcast, MsgManager, broadcast_id):
    """
    Populates the recipients for the passed in broadcast, we just select all the
    msgs for this broadcast, then populate the recipients based on the contacts of
    those messages
    """
    contact_ids = MsgManager.filter(broadcast=broadcast_id).values_list('contact_id', flat=True)
    contact_ids = set([c for c in contact_ids if c is not None])

    # clear any current recipients, we are rebuilding
    RelatedRecipients = Broadcast.recipients.through
    Broadcast.objects.get(id=broadcast_id).recipients.clear()

    for contact_id_batch in chunk_list(contact_ids, 1000):
        recipient_batch = [RelatedRecipients(contact_id=c, broadcast_id=broadcast_id) for c in contact_id_batch]
        RelatedRecipients.objects.bulk_create(recipient_batch)

    return len(contact_ids)
Example #42
0
def check_campaigns_task():
    """
    See if any event fires need to be triggered
    """
    from temba.flows.models import Flow

    unfired = EventFire.objects.filter(
        fired=None, scheduled__lte=timezone.now(), event__flow__flow_server_enabled=False
    ).select_related("event")
    unfired = unfired.values("id", "event_id", "event__flow_id")

    # group fire events by event so they can be batched
    fire_ids_by_event_id = defaultdict(list)
    event_flow_map = dict()
    for fire in unfired:
        event_flow_map[fire["event_id"]] = fire["event__flow_id"]
        fire_ids_by_event_id[fire["event_id"]].append(fire["id"])

    # fetch the flows used by all these event fires
    flows_by_id = {flow.id: flow for flow in Flow.objects.filter(id__in=event_flow_map.values())}

    queued_fires = QueueRecord("queued_event_fires")

    # create queued tasks
    for ev_id, fire_ids in fire_ids_by_event_id.items():
        flow_id = event_flow_map[ev_id]
        flow = flows_by_id[flow_id]

        # create sub-batches no no single task is too big
        for fire_id_batch in chunk_list(fire_ids, 500):

            # ignore any fires which were queued by previous calls to this task but haven't yet been marked as fired
            queued_fire_ids = queued_fires.filter_unqueued(fire_id_batch)

            if queued_fire_ids:
                try:
                    push_task(
                        flow.org_id, Queue.HANDLER, HANDLE_EVENT_TASK, dict(type=FIRE_EVENT, fires=queued_fire_ids)
                    )

                    queued_fires.set_queued(queued_fire_ids)
                except Exception:  # pragma: no cover
                    fire_ids_str = ",".join(str(f) for f in queued_fire_ids)
                    logger.error("Error queuing campaign event fires: %s" % fire_ids_str, exc_info=True)
Example #43
0
def migrate_from_calls(apps, schema_editor):
    Call = apps.get_model('msgs', 'Call')
    ChannelEvent = apps.get_model('channels', 'ChannelEvent')
    ContactURN = apps.get_model('contacts', 'ContactURN')

    call_ids = list(Call.objects.values_list('pk', flat=True))
    num_created = 0
    num_without_urn = 0

    urn_prefetch = Prefetch('contact__urns', ContactURN.objects.filter(scheme='tel'))

    for call_id_batch in chunk_list(call_ids, 1000):
        call_batch = list(Call.objects.filter(pk__in=call_id_batch).prefetch_related(urn_prefetch))
        event_batch = []

        for call in call_batch:
            contact_urns = list(call.contact.urns.all())
            call_urn = contact_urns[0] if contact_urns else None

            if not call_urn:
                num_without_urn += 1

            event_batch.append(ChannelEvent(event_type=call.call_type,
                                            time=call.time,
                                            duration=call.duration,
                                            created_on=call.created_on,
                                            is_active=call.is_active,
                                            channel_id=call.channel_id,
                                            contact_id=call.contact_id,
                                            contact_urn=call_urn,
                                            org_id=call.org_id))

        ChannelEvent.objects.bulk_create(event_batch)
        num_created += len(event_batch)

        print(" > Migrated %d of %d calls" % (num_created, len(call_ids)))

    if num_created:
        print("Migrated %d calls to channel events (couldn't find URN for %d)" % (num_created, num_without_urn))
Example #44
0
def exit_active_flowruns(Contact, log=False):
    from temba.flows.models import FlowRun

    exit_runs = []

    # find all contacts that have more than one active run
    active_contact_ids = Contact.objects.filter(runs__is_active=True).order_by('id')\
        .annotate(run_count=Count('id')).filter(run_count__gt=1).values_list('id', flat=True)

    if log:
        print "%d contacts to evaluate runs for" % len(active_contact_ids)

    for idx, contact_id in enumerate(active_contact_ids):
        active_runs = FlowRun.objects.filter(contact_id=contact_id, is_active=True).order_by('-modified_on')

        # more than one? we may need to expire some
        if len(active_runs) > 1:
            last = active_runs[0]
            contact_exit_runs = [r.id for r in active_runs[1:]]
            ancestor = last.parent
            while ancestor:
                exit_runs.remove(ancestor.id)
                ancestor = ancestor.parent

            exit_runs += contact_exit_runs

        if (idx % 100) == 0:
            if log:
                print "  - %d / %d contacts evaluated. %d runs to exit" % (idx, len(active_contact_ids), len(exit_runs))

    # ok, now exit those runs
    exited = 0
    for batch in chunk_list(exit_runs, 1000):
        runs = FlowRun.objects.filter(id__in=batch)
        FlowRun.bulk_exit(runs, FlowRun.EXIT_TYPE_INTERRUPTED, timezone.now())

        exited += len(batch)
        if log:
            print " * %d / %d runs exited." % (exited, len(exit_runs))
Example #45
0
def trim_event_fires_task():
    start = timezone.now()
    boundary = timezone.now() - timedelta(days=settings.EVENT_FIRE_TRIM_DAYS)

    # first look for unfired fires that belong to inactive events
    trim_ids = list(
        EventFire.objects.filter(fired=None, event__is_active=False).values_list("id", flat=True)[:EVENT_FIRES_TO_TRIM]
    )

    # if we have trimmed all of our unfired inactive fires, look for old fired ones
    if len(trim_ids) < EVENT_FIRES_TO_TRIM:
        trim_ids += list(
            EventFire.objects.filter(fired__lt=boundary)
            .values_list("id", flat=True)
            .order_by("fired")[: EVENT_FIRES_TO_TRIM - len(trim_ids)]
        )

    for batch in chunk_list(trim_ids, 100):
        # use a bulk delete for performance reasons, nothing references EventFire
        EventFire.objects.filter(id__in=batch).delete()

    print(f"Deleted {len(trim_ids)} event fires in {timezone.now()-start}")
Example #46
0
def resolve_twitter_ids():
    r = get_redis_connection()
    # TODO: we can't use our non-overlapping task decorator as it creates a loop in the celery resolver when registering
    if r.get("resolve_twitter_ids_task"):  # pragma: no cover
        return

    with r.lock("resolve_twitter_ids_task", 1800):
        # look up all 'twitter' URNs, limiting to 30k since that's the most our API would allow anyways
        twitter_urns = ContactURN.objects.filter(
            scheme=TWITTER_SCHEME, contact__is_stopped=False, contact__is_blocked=False
        ).exclude(contact=None)
        twitter_urns = twitter_urns[:30000].only("id", "org", "contact", "path")
        api_key = settings.TWITTER_API_KEY
        api_secret = settings.TWITTER_API_SECRET
        client = Twython(api_key, api_secret)

        updated = 0
        print("found %d twitter urns to resolve" % len(twitter_urns))

        # contacts we will stop
        stop_contacts = []

        # we try to look these up 100 at a time
        for urn_batch in chunk_list(twitter_urns, 100):
            screen_names = [u.path for u in urn_batch]
            screen_map = {u.path: u for u in urn_batch}

            # try to fetch our users by screen name
            try:
                resp = client.lookup_user(screen_name=",".join(screen_names))

                for twitter_user in resp:
                    screen_name = twitter_user["screen_name"].lower()
                    twitter_id = twitter_user["id"]

                    if screen_name in screen_map and twitter_user["id"]:
                        twitterid_urn = URN.normalize(URN.from_twitterid(twitter_id, screen_name))
                        old_urn = screen_map[screen_name]

                        # create our new contact URN
                        new_urn = ContactURN.get_or_create(old_urn.org, old_urn.contact, twitterid_urn)

                        # if our new URN already existed for another contact and it is newer
                        # than our old contact, reassign it to the old contact
                        if (
                            new_urn.contact != old_urn.contact
                            and new_urn.contact.created_on > old_urn.contact.created_on
                        ):
                            new_urn.contact = old_urn.contact
                            new_urn.save(update_fields=["contact"])

                        # get rid of our old URN
                        ContactURN.objects.filter(id=old_urn.id).update(contact=None)
                        del screen_map[screen_name]
                        updated += 1

            except Exception as e:
                # if this wasn't an exception caused by not finding any of the users, then break
                if str(e).find("No user matches") < 0:
                    # exit, we'll try again later
                    print("exiting resolve_twitter_ids due to exception: %s" % e)
                    break

            # add all remaining contacts to the contacts we will stop
            for contact in screen_map.values():
                stop_contacts.append(contact)

        # stop all the contacts we couldn't resolve that have only a twitter URN
        stopped = 0
        for contact_urn in stop_contacts:
            contact = contact_urn.contact
            if len(contact.urns.all()) == 1:
                contact.stop(contact.created_by)
                stopped += 1

        if len(twitter_urns) > 0:
            print("updated %d twitter urns, %d stopped" % (updated, len(stop_contacts)))
Example #47
0
    def create_contacts(self, orgs, locations, num_contacts):
        """
        Creates test and regular contacts for this database. Returns tuples of org, contact id and the preferred urn
        id to avoid trying to hold all contact and URN objects in memory.
        """
        group_counts = defaultdict(int)

        self._log("Creating %d test contacts..." % (len(orgs) * len(USERS)))

        for org in orgs:
            test_contacts = []
            for user in org.cache["users"]:
                test_contacts.append(Contact.get_test_contact(user))
            org.cache["test_contacts"] = test_contacts

        self._log(self.style.SUCCESS("OK") + "\n")
        self._log("Creating %d regular contacts...\n" % num_contacts)

        # disable table triggers to speed up insertion and in the case of contact group m2m, avoid having an unsquashed
        # count row for every contact
        with DisableTriggersOn(Contact, ContactURN, ContactGroup.contacts.through):
            names = [("%s %s" % (c1, c2)).strip() for c2 in CONTACT_NAMES[1] for c1 in CONTACT_NAMES[0]]
            names = [n if n else None for n in names]

            batch_num = 1
            for index_batch in chunk_list(range(num_contacts), self.batch_size):
                batch = []

                # generate flat representations and contact objects for this batch
                for c_index in index_batch:  # pragma: no cover
                    org = self.random_org(orgs)
                    name = self.random_choice(names)
                    location = self.random_choice(locations) if self.probability(CONTACT_HAS_FIELD_PROB) else None
                    created_on = self.timeline_date(c_index / num_contacts)

                    c = {
                        "org": org,
                        "user": org.cache["users"][0],
                        "name": name,
                        "groups": [],
                        "tel": "+2507%08d" % c_index if self.probability(CONTACT_HAS_TEL_PROB) else None,
                        "twitter": "%s%d" % (name.replace(" ", "_").lower() if name else "tweep", c_index)
                        if self.probability(CONTACT_HAS_TWITTER_PROB)
                        else None,
                        "gender": self.random_choice(("M", "F")) if self.probability(CONTACT_HAS_FIELD_PROB) else None,
                        "age": self.random.randint(16, 80) if self.probability(CONTACT_HAS_FIELD_PROB) else None,
                        "joined": self.random_date() if self.probability(CONTACT_HAS_FIELD_PROB) else None,
                        "ward": location[0] if location else None,
                        "district": location[1] if location else None,
                        "state": location[2] if location else None,
                        "language": self.random_choice(CONTACT_LANGS),
                        "is_stopped": self.probability(CONTACT_IS_STOPPED_PROB),
                        "is_blocked": self.probability(CONTACT_IS_BLOCKED_PROB),
                        "is_active": self.probability(1 - CONTACT_IS_DELETED_PROB),
                        "created_on": created_on,
                        "modified_on": self.random_date(created_on, self.db_ends_on),
                    }

                    c["fields_as_json"] = {}

                    if c["gender"] is not None:
                        c["fields_as_json"][str(org.cache["fields"]["gender"].uuid)] = {"text": str(c["gender"])}
                    if c["age"] is not None:
                        c["fields_as_json"][str(org.cache["fields"]["age"].uuid)] = {
                            "text": str(c["age"]),
                            "number": str(c["age"]),
                        }
                    if c["joined"] is not None:
                        c["fields_as_json"][str(org.cache["fields"]["joined"].uuid)] = {
                            "text": org.format_datetime(c["joined"], show_time=False),
                            "datetime": timezone.localtime(c["joined"], org.timezone).isoformat(),
                        }

                    if location:
                        c["fields_as_json"].update(
                            {
                                str(org.cache["fields"]["ward"].uuid): {
                                    "text": str(c["ward"].path.split(" > ")[-1]),
                                    "ward": c["ward"].path,
                                    "district": c["district"].path,
                                    "state": c["state"].path,
                                },
                                str(org.cache["fields"]["district"].uuid): {
                                    "text": str(c["district"].path.split(" > ")[-1]),
                                    "district": c["district"].path,
                                    "state": c["state"].path,
                                },
                                str(org.cache["fields"]["state"].uuid): {
                                    "text": str(c["state"].path.split(" > ")[-1]),
                                    "state": c["state"].path,
                                },
                            }
                        )

                    # work out which system groups this contact belongs to
                    if c["is_active"]:
                        if not c["is_blocked"] and not c["is_stopped"]:
                            c["groups"].append(org.cache["system_groups"][ContactGroup.TYPE_ALL])
                        if c["is_blocked"]:
                            c["groups"].append(org.cache["system_groups"][ContactGroup.TYPE_BLOCKED])
                        if c["is_stopped"]:
                            c["groups"].append(org.cache["system_groups"][ContactGroup.TYPE_STOPPED])

                    # let each user group decide if it is taking this contact
                    for g in org.cache["groups"]:
                        if g.member(c) if callable(g.member) else self.probability(g.member):
                            c["groups"].append(g)

                    # track changes to group counts
                    for g in c["groups"]:
                        group_counts[g] += 1

                    batch.append(c)

                self._create_contact_batch(batch)
                self._log(" > Created batch %d of %d\n" % (batch_num, max(num_contacts // self.batch_size, 1)))
                batch_num += 1

        # create group count records manually
        counts = []
        for group, count in group_counts.items():
            counts.append(ContactGroupCount(group=group, count=count, is_squashed=True))
            group.count = count
        ContactGroupCount.objects.bulk_create(counts)
def add_deps(Flow, ActionSet, RuleSet):
    # constants
    FlowFlowDeps = Flow.flow_dependencies.through
    startFlowActionType = "flow"
    triggerFlowActionType = "trigger-flow"
    rulesetTypeSubflow = "subflow"

    start_time = time.monotonic()
    print("Collecting flows and dependencies...")

    # inactive flows have their deps cleared out, we only check active flows
    valid_flows = Flow.objects.filter(is_active=True).values_list("id", "uuid")

    valid_flow_map = dict()
    flow_ids = list()

    for valid_flow in valid_flows:
        flow_id, flow_uuid = valid_flow

        valid_flow_map[flow_uuid] = flow_id
        flow_ids.append(flow_id)

    total_flows = len(flow_ids)
    processed_flows = 0

    expected_flow_deps = defaultdict(set)

    print("Processing flow dependencies...")
    for flow_ids_chunk in chunk_list(flow_ids, 1000):
        chunk_start_time = time.monotonic()
        actionsets = (
            ActionSet.objects.filter(flow_id__in=flow_ids_chunk)
            .values("flow_id")
            .annotate(actions=ArrayAgg("actions"))
        )

        for actionset in actionsets:
            flow_id = actionset["flow_id"]
            actionset_actions = actionset["actions"]

            for action_list in actionset_actions:
                for action in action_list:
                    #
                    if action["type"] == startFlowActionType:
                        flow_uuid = action["flow"]["uuid"]

                        # there might be some inactive flows listed as dependencies, ignore
                        if flow_uuid in valid_flow_map:
                            expected_flow_deps[flow_id].add(valid_flow_map[flow_uuid])

                    if action["type"] == triggerFlowActionType:
                        flow_uuid = action["flow"]["uuid"]

                        # there might be some inactive flows listed as dependencies, ignore
                        if flow_uuid in valid_flow_map:
                            expected_flow_deps[flow_id].add(valid_flow_map[flow_uuid])

        rulesets = (
            RuleSet.objects.filter(flow_id__in=flow_ids_chunk, ruleset_type=rulesetTypeSubflow)
            .values("flow_id")
            .annotate(configs=ArrayAgg("config"))
        )

        for ruleset in rulesets:
            flow_id = ruleset["flow_id"]
            ruleset_configs = ruleset["configs"]

            for config in ruleset_configs:

                flow_uuid = config["flow"]["uuid"]

                # there might be some inactive flows listed as dependencies, ignore
                if flow_uuid in valid_flow_map:
                    expected_flow_deps[flow_id].add(valid_flow_map[flow_uuid])

        processed_flows += len(flow_ids_chunk)
        print(f"Processed {processed_flows}/{total_flows} in {time.monotonic() - chunk_start_time}")

    print(f"Collected flows and dependencies in {time.monotonic() - start_time}")

    print("Comparing actual to expected flow dependencies...")
    flow_dep_ids = list(expected_flow_deps.keys())
    total_added_deps = 0

    bulk_deps_to_add = list()

    for from_flow_id in flow_dep_ids:
        actual_flow_dep_results = (
            FlowFlowDeps.objects.filter(from_flow_id=from_flow_id)
            .values("from_flow_id")
            .annotate(deps=ArrayAgg("to_flow_id"))
        )
        actual_flow_dep = next(actual_flow_dep_results.iterator(), None)

        if actual_flow_dep:
            actual_deps = set(actual_flow_dep["deps"])
        else:
            actual_deps = set()

        deps_to_add = expected_flow_deps[from_flow_id].difference(actual_deps)
        total_added_deps += len(deps_to_add)

        for dep in deps_to_add:
            bulk_deps_to_add.append(FlowFlowDeps(from_flow_id=from_flow_id, to_flow_id=dep))

    FlowFlowDeps.objects.bulk_create(bulk_deps_to_add)

    print(f"Total added missing deps: {total_added_deps}")