def recheck_per_alloy(limit: int = None, state: str = None, max_minutes: int = None): if max_minutes is None: max_minutes = get_feature_int( "voter_action_check", "max_recheck_alloy_minutes") or ( settings.VOTER_CHECK_INTERVAL_MINUTES // 2) if state: updates = [ AlloyDataUpdate.objects.filter( state=state).order_by("-created_at").first() ] else: saw_state = set() updates = [] for update in AlloyDataUpdate.objects.order_by("-created_at"): if update.state not in saw_state: saw_state.add(update.state) updates.append(update) for update in updates: logger.info( f"State {update.state} update for {update.state_update_at} appeared at {update.created_at}" ) checked = _recheck_old_actions( limit=limit, state=update.state, since=update.created_at, max_minutes=max_minutes, ) if limit is not None and checked: limit -= checked if limit <= 0: return
def get_watermark(): last = cache.get(WATERMARK_CACHE_KEY, None) if last: return datetime.datetime.fromisoformat(last).replace( tzinfo=datetime.timezone.utc) last = Number.objects.aggregate( Max("opt_out_time")).get("opt_out_time__max") if last: # Add some slop here because the webhook timestamp is our # *local receive* time, not what twilio has recorded, so our # message timestamps (when delivered via webhook) are somewhat # inaccurate. :( slop = get_feature_int("smsbot", "poll_slop_seconds") or 0 return last - datetime.timedelta(seconds=slop) return datetime.datetime.now( tz=datetime.timezone.utc) - datetime.timedelta( seconds=get_feature_int("smsbot", "poll_max_seconds") or settings.SMS_OPTOUT_POLL_MAX_SECONDS)
def send_blank_register_forms(offset=0, limit=None, state=None) -> None: from .tasks import send_blank_register_forms_to_lead as send_forms_task # post-experiment, moving forward states = [ "AZ", "GA", "FL", "MI", "NC", "PA", "WI", "OH", "MN", "CO", "IA", "ME", "NE", "KS", "SC", "AL", "MS", "MT", "UT", ] if state: states = [state] q = MoverLead.objects.filter( new_state__in=states, created_at__gte=datetime.datetime(2020, 9, 16, 0, 0, 0, tzinfo=datetime.timezone.utc), blank_register_forms_action__isnull=True, new_housing_tenure="rent", ).exclude(new_state=F("old_state")) if limit: offset + limit queue_async = get_feature_bool("movers", "blank_forms_async") max_minutes = get_feature_int("movers", "blank_forms_max_minutes") or 55 logger.info( f"send_blank_register_forms states {states}, offset={offset}, limit={limit}, count={q.count()}, async={queue_async}, max_minutes={max_minutes}" ) for lead in q[0:limit]: if queue_async: send_forms_task.apply_async(args=(lead.uuid, ), expire=(max_minutes * 60)) else: send_blank_register_forms_to_lead(lead)
def recheck_old_actions( limit: int = None, state: str = None, min_check_interval: int = None, since: datetime.datetime = None, max_action_age: int = None, max_minutes: int = None, ) -> None: if limit is None: limit = get_feature_int("voter_action_check", "max_recheck") if not limit: logger.info("voter_action_check disabled or max_recheck==0") return recheck_per_alloy(limit=limit, state=state, max_minutes=max_minutes) recheck_any(limit=limit, state=state, max_minutes=max_minutes)
def recheck_any( limit: int = None, state: str = None, min_check_interval: int = None, max_action_age: int = None, max_minutes: int = None, ) -> None: if max_minutes is None: max_minutes = get_feature_int( "voter_action_check", "max_recheck_any_minutes") or ( settings.VOTER_CHECK_INTERVAL_MINUTES // 2) _recheck_old_actions( limit=limit, state=state, min_check_interval=min_check_interval, max_action_age=max_action_age, max_minutes=max_minutes, )
def geocode_leads(new_state: str = None, old_state: str = None, limit: int = None) -> None: from .tasks import geocode_mover q = MoverLead.objects.filter(new_region_id__isnull=True, old_region_id__isnull=True) if new_state: q = q.filter(new_state=new_state) if old_state: q = q.filter(old_state=old_state) queue_async = get_feature_bool("movers", "geocode_async") max_minutes = get_feature_int("movers", "geocode_max_minutes") or 55 for lead in q[0:limit]: if queue_async: geocode_mover.apply_async(args=(str(lead.uuid), ), expire=(max_minutes * 60)) else: geocode_lead(lead)
def push_to_actionnetwork(limit=None, offset=0, new_state=None) -> None: from .tasks import push_mover_to_actionnetwork session = get_actionnetwork_session(settings.ACTIONNETWORK_KEY) form_id = get_or_create_form(session) q = MoverLead.objects.filter( actionnetwork_person_id__isnull=True, source_created_at__gte=datetime.datetime(2020, 9, 16, 0, 0, 0, tzinfo=datetime.timezone.utc), ).order_by("source_created_at") if new_state: q = q.filter(new_state=new_state) if limit: end = offset + limit else: end = None queue_async = get_feature_bool("movers", "push_to_actionnetwork_async") max_minutes = get_feature_int("movers", "push_to_actionnetwork_max_minutes") or 55 for lead in q[offset:end]: if queue_async: push_mover_to_actionnetwork.apply_async(args=(lead.pk, ), expire=(max_minutes * 60)) else: _push_lead(session, form_id, lead) time.sleep(settings.ACTIONNETWORK_SYNC_DELAY)
def send_blank_register_forms_tx(offset=0, limit=None) -> None: from .tasks import send_blank_register_forms_to_lead as send_forms_task q = MoverLead.objects.filter( new_state="TX", created_at__gte=datetime.datetime( 2020, 9, 16, 0, 0, 0).replace(tzinfo=datetime.timezone.utc), blank_register_forms_action__isnull=True, new_housing_tenure="rent", ) end = None if limit: end = offset + limit queue_async = get_feature_bool("movers", "blank_forms_tx_async") max_minutes = get_feature_int("movers", "blank_forms_tx_max_minutes") or 55 logger.info( f"send_blank_register_forms_tx offset={offset}, limit={limit}, count={q.count()}, " f"async={queue_async}, max_minutes={max_minutes}") for lead in q[offset:end]: if queue_async: send_forms_task.apply_async(args=(lead.uuid, ), expire=(max_minutes * 60)) else: send_blank_register_forms_to_lead(lead)
def _recheck_old_actions( limit: int = None, state: str = None, min_check_interval: int = None, since: datetime.datetime = None, max_action_age: int = None, max_minutes: int = None, ) -> int: # start with actions we've previously tried to match query = Action.objects.filter(last_voter_lookup__isnull=False) # unmatched or unregistered query = query.filter(Q(voter_id__isnull=True) | Q(voter__registered=False)) if not since: # don't recheck things we recently checked if min_check_interval is None: min_check_interval = (get_feature_int("voter_action_check", "recheck_internal_days") or settings.VOTER_RECHECK_INTERVAL_DAYS) since = datetime.datetime.now( tz=datetime.timezone.utc) - datetime.timedelta( days=min_check_interval) query = query.filter(last_voter_lookup__lt=since) # ignore very old actions if max_action_age is None: max_action_age = (get_feature_int("voter_action_check", "recheck_max_days") or settings.VOTER_RECHECK_MAX_DAYS) action_cutoff = datetime.datetime.now( tz=datetime.timezone.utc) - datetime.timedelta(days=max_action_age) query = query.filter(created_at__gt=action_cutoff) if state: query = query.filter( Q(lookup__state_id=state) | Q(registration__state_id=state) | Q(ballotrequest__state_id=state) | Q(reminderrequest__state_id=state)) else: # make sure it matches an item that we actually check query = query.filter( Q(lookup__isnull=False) | Q(registration__isnull=False) | Q(ballotrequest__isnull=False) | Q(reminderrequest__isnull=False)) if not max_minutes: max_minutes = settings.VOTER_CHECK_INTERVAL_MINUTES if limit and state == "WI": # adjust limit for queue rate limit = int(limit * settings.BULK_QUEUE_RATE_LIMITS["voter"] / settings.BULK_QUEUE_RATE_LIMITS["voter-wi"]) queue_async = get_feature_bool("voter_action_check", "recheck_async") if not queue_async: stop = datetime.datetime.utcnow() + datetime.timedelta( minutes=max_minutes) count = query.count() if not count: return 0 logger.info( f"Recheck old actions limit {limit} of {count} in {state} since {since} " f"(min_check_interval {min_check_interval}, max_action_age {max_action_age}, " f"max_minutes {max_minutes})") query = query.order_by("created_at") if limit: query = query[:limit] checked = 0 for action in query: if queue_async: if state == "WI": voter_lookup_action.apply_async( args=(str(action.uuid), ), expires=(max_minutes * 60), queue=f"voter-wi", ) else: voter_lookup_action.apply_async(args=(str(action.uuid), ), expires=(max_minutes * 60)) checked += 1 else: item = action.get_source_item() if item: lookup(item) checked += 1 if stop < datetime.datetime.utcnow(): logger.info(f"Hit max runtime ({max_minutes} minutes)") break return checked
def check_new_actions(limit: int = None, state: str = None, max_minutes: int = None) -> None: if limit is None: limit = get_feature_int("voter_action_check", "max_check") if not limit: logger.info("voter_action_check disabled or max_check==0") return if max_minutes is None: max_minutes = get_feature_int( "voter_action_check", "max_check_minutes") or (settings.VOTER_CHECK_INTERVAL_MINUTES // 2) cutoff = datetime.datetime.utcnow().replace( tzinfo=datetime.timezone.utc) - datetime.timedelta( seconds=settings.ACTION_CHECK_UNFINISHED_DELAY) query = Action.objects.filter(last_voter_lookup=None, created_at__lt=cutoff) if state: query = query.filter( Q(lookup__state_id=state) | Q(registration__state_id=state) | Q(ballotrequest__state_id=state) | Q(reminderrequest__state_id=state)) else: # make sure it matches an item that we actually check query = query.filter( Q(lookup__isnull=False) | Q(registration__isnull=False) | Q(ballotrequest__isnull=False) | Q(reminderrequest__isnull=False)) logger.info( f"check_new_actions limit {limit} of {query.count()}, state {state}, max_minutes {max_minutes}" ) if not max_minutes: max_minutes = settings.VOTER_CHECK_INTERVAL_MINUTES queue_async = get_feature_bool("voter_action_check", "check_async") if not queue_async: stop = datetime.datetime.utcnow() + datetime.timedelta( minutes=max_minutes) query = query.order_by("created_at") if limit: query = query[:limit] for action in query: if queue_async: if state == "WI": voter_lookup_action.apply_async( args=(str(action.uuid), ), expires=(max_minutes * 60), queue=f"voter-wi", ) else: voter_lookup_action.apply_async(args=(str(action.uuid), ), expires=(max_minutes * 60)) else: item = action.get_source_item() if item: lookup(item) if stop < datetime.datetime.utcnow(): logger.info(f"Hit max runtime ({max_minutes} minutes)") break
def handle_incoming( from_number: str, sid: str, date_created: datetime.datetime, body: str, ) -> Tuple[Number, str]: n, created = Number.objects.get_or_create(phone=from_number) if created: logger.info(f"New {n}") if SMSMessage.objects.filter(phone=n, twilio_sid=sid).exists(): logger.debug(f"Ignoring duplicate {n} at {date_created} {sid}: {body}") return n, None logger.debug(f"Handling {n} at {date_created} {sid}: {body}") SMSMessage.objects.create( phone=n, direction=MessageDirectionType.IN, message=body, twilio_sid=sid, ) cmd = body.strip().upper() reply = None if cmd in ["STOP", "STOPALL", "UNSUBSCRIBE", "CANCEL", "END", "QUIT"]: logger.info(f"Opt-out from {n} at {date_created}") n.opt_out_time = date_created n.opt_in_time = None n.save() # reply IFF optimizely has a value reply = get_feature_str("smsbot", "stop") elif cmd in ["JOIN"]: logger.info(f"Opt-in from {n} at {date_created}") n.opt_in_time = date_created n.opt_out_time = None n.save() # Try to match this to an ActionNetwork subscriber. Note that # this will may fail if the number has been used more than # once. if resubscribe_phone(str(n.phone)): reply = get_feature_str("smsbot", "join_success") else: reply = get_feature_str("smsbot", "join_fail") elif cmd in ["HELP", "INFO"]: # reply IFF optimizely has a value for this reply = get_feature_str("smsbot", "help") elif (cmd in ["PPMAP", "EVMAP"] or cmd.startswith("PPMAP ") or cmd.startswith("EVMAP ")): from .blast import send_map_mms # is this a known voter? map_type = cmd[0:2].lower() rest = cmd.split(" ")[1:] content = """{locator_url} If you are registered to vote at {home_address_short} then your WHAT is: {dest_name} {dest_address} {dest_hours} If that is not your address, find your polling place at: https://my.voteamerica.com/vote Reply HELPLINE with any questions about voting, or VOTED if you have already voted. """.replace("WHAT", "polling place" if map_type == "pp" else "early voting location") if rest: reply = send_map_mms(n, map_type, " ".join(rest), content=content) else: voter = Voter.objects.filter(phone=from_number).first() if voter: reply = send_map_mms( n, map_type, f"{voter.address_full}, {voter.city}, {voter.state_id} {voter.zipcode}", content=content, ) else: reply = f"I don't have a voter record for {from_number}" else: if n.opt_out_time: reply = get_feature_str("smsbot", "autoreply_opted_out") else: reply = get_feature_str("smsbot", "autoreply") if reply: # check last try: last = (SMSMessage.objects.filter( phone=n, direction=MessageDirectionType.OUT).exclude( twilio_sid=sid).latest("created_at")) except SMSMessage.DoesNotExist: last = None throttle_minutes = (get_feature_int("smsbot", "autoreply_throttle_minutes") or settings.SMS_AUTOREPLY_THROTTLE_MINUTES) if (last and last.message == reply and last.created_at >= datetime.datetime.now(tz=datetime.timezone.utc) - datetime.timedelta(minutes=throttle_minutes)): logger.info( f"Ignoring {n} at {date_created} (last autoreply at {last.created_at}): {body}" ) reply = None else: logger.info(f"Auto-reply to {n} at {date_created}: {body}") return n, reply
def check_group(slug, down_sites=False): if down_sites: logger.info(f"Checking group {slug} (down sites only)") else: logger.info(f"Checking group {slug}") query = StateInformation.objects.filter(field_type__slug=slug) items = [i for i in query] random.shuffle(items) while True: try: drivers = get_drivers() except WebDriverException as e: logger.warning(f"Failed to start selenium worker: {e}") sentry_sdk.capture_exception( SeleniumError("Failed to start selenium worker")) return if not drivers: logger.error("No active proxies") sentry_sdk.capture_exception(NoProxyError("No active proxies")) return try: now = datetime.datetime.utcnow().replace( tzinfo=datetime.timezone.utc) while items: item = items.pop() # some values are blank if not item.text: continue if get_feature_bool("leouptime", item.state_id) == False: continue desc = f"{item.state_id} {MONITORS[slug]}" site, _ = Site.objects.get_or_create( description=desc, url=item.text, ) if down_sites and site.state_up: continue interval = get_feature_int("leouptime", f"{item.state_id}_interval") if interval: try: last_check = site.sitecheck_set.order_by( "-created_at").first() if (last_check and last_check.created_at + datetime.timedelta(minutes=interval) > now): logger.info( f"SKIPPING: {site.description} {site.url} ({item.state_id}_interval={interval})" ) continue except SiteCheck.DoesNotExist: pass check_site(drivers, site) except StaleProxyError: logger.info("Refreshing proxies...") for driver, proxy in drivers: try: driver.quit() except WebDriverException as e: logger.warning( f"Failed to quit selenium worker for {proxy}: {e}") continue break for driver, proxy in drivers: try: driver.quit() except WebDriverException as e: logger.warning(f"Failed to quit selenium worker for {proxy}: {e}")
def check_proxies(): stray = get_proxies_by_name() up = 0 creating_cutoff = datetime.datetime.utcnow().replace( tzinfo=datetime.timezone.utc) - datetime.timedelta(minutes=10) bad_proxies = [] for proxy in Proxy.objects.all(): if proxy.description in stray: logger.info(f"Have proxy {proxy}") if proxy.state == enums.ProxyStatus.BURNED: # we should delete this proxy.state = enums.ProxyStatus.DOWN proxy.save() elif proxy.state == enums.ProxyStatus.DOWN: # we should delete this pass elif (proxy.state == enums.ProxyStatus.CREATING and proxy.modified_at < creating_cutoff): # delete logger.info(f"Proxy {proxy} has been CREATING for too long") proxy.delete() else: # not stray if proxy.state == enums.ProxyStatus.UP: up += 1 # get a selenium driver tries = 0 while True: try: driver = get_driver(proxy) break except WebDriverException as e: logger.warning( f"Failed to start selenium worker, tries {tries}: {e}" ) tries += 1 if tries > 2: logger.warning( f"Failed to start selenium worker; we'll check proxies later: {e}" ) # just bail out here completely; we'll check our proxies again in a bit return # make sure the proxy is actually working site = get_sentinel_site() check = check_site_with(driver, proxy, site) if not check.state_up: logger.info( f"Proxy {proxy} can't reach sentinel site {site}") bad_proxies.append((proxy, check)) try: driver.quit() except WebDriverException as e: logger.warning( f"Failed to quit selenium worker for {proxy}: {e}") del stray[proxy.description] else: if proxy.state != enums.ProxyStatus.DOWN: logger.info(f"No droplet for proxy {proxy}") proxy.state = enums.ProxyStatus.DOWN proxy.save() for name, info in stray.items(): if not name.startswith(PROXY_PREFIX): continue logger.info(f"Removing stray droplet {info['id']}") remove_proxy(info["id"]) if bad_proxies: if len(bad_proxies) == up: logger.warning( "All proxies appear to be failing--the sentinel is probably down?" ) else: for proxy, check in bad_proxies: logger.warning( f"Marking proxy {proxy} BURNED (due to sentinel check failure)" ) check.ignore = True check.save() proxy.state = enums.ProxyStatus.BURNED proxy.save() up -= 1 proxy_count = get_feature_int("leouptime", "proxy_count") or settings.PROXY_COUNT logger.info(f"proxy_count {proxy_count}") # sentence oldest proxy to death? max_age = get_feature_int("leouptime", "proxy_max_age_hours") if max_age and up > proxy_count // 2: proxy = (Proxy.objects.filter( state=enums.ProxyStatus.UP).order_by("created_at").first()) if proxy and proxy.created_at < datetime.datetime.utcnow().replace( tzinfo=datetime.timezone.utc) - datetime.timedelta( hours=max_age): logger.info( f"Burning oldest proxy {proxy} (created_at {proxy.created_at}, max_age {max_age})" ) proxy.status = enums.ProxyStatus.BURNED proxy.save() up -= 1 else: logger.info( f"Keeping oldest proxy {proxy} (created_at {proxy.created_at}, max_age {max_age})" ) while up < proxy_count: create_proxy(random.choice(REGIONS)) up += 1