def remove_blocked_domain_invited_users_from_hubspot(stdout=None): """ Removes contacts from Hubspot who have ever accepted an invitation on HQ from a domain that is blocking Hubspot data. :param stdout: the stdout of a management command (if applicable) """ blocked_user_emails = emails_that_accepted_invitations_to_blocked_hubspot_domains() total_users = blocked_user_emails.count() chunk_size = 30 # Hubspot recommends fewer than 100 emails per request num_chunks = int(math.ceil(float(total_users) / float(chunk_size))) if stdout: stdout.write("\n\nChecking Invited Users...") for chunk in range(num_chunks): start = chunk * chunk_size end = (chunk + 1) * chunk_size emails_to_check = blocked_user_emails[start:end] ids_to_delete = _get_contact_ids_to_delete(set(emails_to_check)) if stdout: stdout.write(f"Found {len(ids_to_delete)} id(s) to delete.") num_deleted = sum(_delete_hubspot_contact(vid) for vid in ids_to_delete) metrics_counter( 'commcare.hubspot_data.deleted_user.blocked_domain_invitation', num_deleted )
def _record_datadog_metrics(self, changes_chunk, processing_time): tags = {"pillow_name": self.get_name(), "mode": "chunked"} change_count = len(changes_chunk) if settings.ENTERPRISE_MODE: type_counter = Counter([ change.metadata.document_subtype for change in changes_chunk if change.metadata.document_type == 'CommCareCase' ]) for case_type, type_count in type_counter.items(): metrics_counter('commcare.change_feed.changes.count', type_count, tags={**tags, 'case_type': case_type}) remainder = change_count - sum(type_counter.values()) if remainder: metrics_counter('commcare.change_feed.changes.count', remainder, tags=tags) else: metrics_counter('commcare.change_feed.changes.count', change_count, tags=tags) max_change_lag = (datetime.utcnow() - changes_chunk[0].metadata.publish_timestamp).total_seconds() min_change_lag = (datetime.utcnow() - changes_chunk[-1].metadata.publish_timestamp).total_seconds() metrics_gauge('commcare.change_feed.chunked.min_change_lag', min_change_lag, tags=tags) metrics_gauge('commcare.change_feed.chunked.max_change_lag', max_change_lag, tags=tags) # processing_time per change metrics_counter('commcare.change_feed.processing_time.total', processing_time / change_count, tags=tags) metrics_counter('commcare.change_feed.processing_time.count', tags=tags)
def _record_batch_exception_in_datadog(self, processor): metrics_counter( "commcare.change_feed.batch_processor_exceptions", tags={ 'pillow_name': self.get_name(), 'processor': processor.__class__.__name__ if processor else "all_processors", })
def create_repeat_records(repeater_cls, payload): # As a temporary fix for https://dimagi-dev.atlassian.net/browse/SUPPORT-12244 # Make a serious attempt to retry creating the repeat record # The real fix is to figure out why the form reprocessing system # isn't resulting in the signal getting re-fired and the repeat record getting created. for sleep_length in [.5, 1, 2, 4, 8 ] if not settings.UNIT_TESTING else [0, 0]: try: _create_repeat_records(repeater_cls, payload) except Exception: notify_exception( None, "create_repeat_records had an error resulting in a retry") metrics_counter('commcare.repeaters.error_creating_record', tags={ 'domain': payload.domain, 'repeater_type': repeater_cls.__name__, }) time.sleep(sleep_length) else: return metrics_counter('commcare.repeaters.failed_to_create_record', tags={ 'domain': payload.domain, 'repeater_type': repeater_cls.__name__, })
def degraded(self): """Indicate that the lock has "degraded gracefully" The lock was not acquired, but processing continued as if it had been acquired. """ metrics_counter("commcare.lock.degraded", tags=self.tags)
def __del__(self): if self.track_unreleased and self.lock_timer.is_started(): metrics_counter("commcare.lock.not_released", tags=self.tags) if self.lock_trace is not None: self.lock_trace.set_tag("deleted", "not_released") self.lock_trace.finish() self.lock_trace = None
def check_repeaters(): start = datetime.utcnow() six_hours_sec = 6 * 60 * 60 six_hours_later = start + timedelta(seconds=six_hours_sec) # Long timeout to allow all waiting repeat records to be iterated check_repeater_lock = get_redis_lock( CHECK_REPEATERS_KEY, timeout=six_hours_sec, name=CHECK_REPEATERS_KEY, ) if not check_repeater_lock.acquire(blocking=False): metrics_counter("commcare.repeaters.check.locked_out") return try: with datadog_bucket_timer( "commcare.repeaters.check.processing", tags=[], timing_buckets=_check_repeaters_buckets, ): for record in iterate_repeat_records(start): if datetime.utcnow() > six_hours_later: _soft_assert( False, "I've been iterating repeat records for six hours. I quit!" ) break metrics_counter("commcare.repeaters.check.attempt_forward") record.attempt_forward_now() finally: check_repeater_lock.release()
def _process_raw_messages(self, sender): """ This processes the messages sent directly from known bounce and complaints daemons. :param sender: a known daemon email (string) """ self.mail.select('inbox') for uid, message in self._get_messages( f'(Header Delivered-To "{settings.RETURN_PATH_EMAIL}" ' f'From "{sender}")'): if message.get('X-Autoreply') or message.get('Auto-Submitted'): self._handle_raw_transient_bounces(uid) else: recipients = self._get_raw_bounce_recipients(message) if recipients: self._handle_raw_bounced_recipients(recipients, uid) else: metrics_counter( 'commcare.bounced_email_manager.recipient_unknown') self._label_problem_email( uid, extra_labels=["RecipientUnknown"]) _bounced_email_soft_assert( False, f'[{settings.SERVER_ENVIRONMENT}] ' f'Could not find a bounced email recipients. ' f'Check inbox for "RecipientUnknown" tag')
def set_cached_payload_if_necessary(self, fileobj, duration, is_async): # must cache if the duration was longer than the threshold is_long_restore = duration > timedelta(seconds=INITIAL_SYNC_CACHE_THRESHOLD) if is_async or self.force_cache or is_long_restore: type_ = 'unknown' if is_async: type_ = 'async' elif self.force_cache: type_ = 'force' elif is_long_restore: type_ = 'long' tags = { 'type': type_, } metrics_counter('commcare.restores.cache_writes', tags=tags) response = CachedResponse.save_for_later( fileobj, self.cache_timeout, self.domain, self.restore_user.user_id, ) self.restore_payload_path_cache.set_value(response.name, self.cache_timeout) return response return None
def _rate_limit_restore_test(domain): if not restore_rate_limiter.allow_usage(domain): metrics_counter('commcare.restore.rate_limited.test', tags={ 'domain': domain, }) restore_rate_limiter.report_usage(domain)
def clean(self): username = self.cleaned_data.get('username') if username is None: raise ValidationError(_('Please enter a valid email address.')) password = self.cleaned_data.get('password') if not password: raise ValidationError(_("Please enter a password.")) if settings.ADD_CAPTCHA_FIELD_TO_FORMS: if not self.cleaned_data.get('captcha'): raise ValidationError(_("Please enter valid CAPTCHA")) try: cleaned_data = super(EmailAuthenticationForm, self).clean() except ValidationError: user = CouchUser.get_by_username(username) if user and user.is_locked_out(): metrics_counter('commcare.auth.lockouts') raise ValidationError(LOCKOUT_MESSAGE) else: raise user = CouchUser.get_by_username(username) if user and user.is_locked_out(): metrics_counter('commcare.auth.lockouts') raise ValidationError(LOCKOUT_MESSAGE) return cleaned_data
def _report_import_timings(self, timer, results): active_duration = timer.duration - self._total_delayed_duration rows_created = results['created_count'] rows_updated = results['match_count'] rows_failed = results['failed_count'] # Add 1 to smooth / prevent denominator from ever being zero active_duration_per_case = active_duration / ( rows_created + rows_updated + rows_failed + 1) metrics_histogram( 'commcare.case_importer.duration_per_case', active_duration_per_case, buckets=[50, 70, 100, 150, 250, 350, 500], bucket_tag='duration', bucket_unit='ms', ) for rows, status in ((rows_created, 'created'), (rows_updated, 'updated'), (rows_failed, 'error')): metrics_counter('commcare.case_importer.cases', rows, tags={ 'status': status, })
def expire(self, parent_id, key, minutes=60): """Set blob expiration to some minutes from now This makes it easy to handle the scenario where a new blob is replacing another (now obsolete) blob, but immediate deletion of the obsolete blob would introduce a race condition because in- flight code may retain references to it. This will schedule the obsolete blob for deletion in the near future at which point such a race condition is extremely unlikely to be triggered. :param parent_id: Parent identifier used for sharding. :param key: Blob key. :param minutes: Optional number of minutes from now that the blob will be set to expire. The default is 60. """ try: meta = self.get(parent_id=parent_id, key=key) except BlobMeta.DoesNotExist: return if meta.expires_on is None: tags = _meta_tags(meta) metrics_counter('commcare.temp_blobs.count', tags=tags) metrics_counter('commcare.temp_blobs.bytes_added', value=meta.stored_content_length, tags=tags) meta.expires_on = _utcnow() + timedelta(minutes=minutes) meta.save()
def _handle_raw_bounced_recipients(self, recipients, uid): for recipient in recipients: if recipient.startswith('<'): # clean recipient = recipient.replace('<', '').replace('>', '') bounce_exists = BouncedEmail.objects.filter( email=recipient, ).exists() transient_exists = TransientBounceEmail.objects.filter( email=recipient).exists() if (not (bounce_exists or transient_exists) and re.search(EMAIL_REGEX_VALIDATION, recipient)): # an email will only show up here if there was no prior # SNS notification for it. add the email to the bounce list and # mark the email in the bounces inbox for further investigation BouncedEmail.objects.update_or_create(email=recipient, ) metrics_counter( 'commcare.bounced_email_manager.sns_notification_missing') self._label_problem_email( uid, extra_labels=["SNSNotificationMissing"]) _bounced_email_soft_assert( False, f'[{settings.SERVER_ENVIRONMENT}] ' f'An email bounced that was not caught by SNS: {recipient}' ) elif not (bounce_exists or transient_exists): # this email failed to validate, find out why metrics_counter( 'commcare.bounced_email_manager.validation_failed') self._label_problem_email(uid, extra_labels=["ValidationFailed"]) _bounced_email_soft_assert( False, f'[{settings.SERVER_ENVIRONMENT}] ' f'Tried to mark "{recipient}" as BOUNCED and failed ' f'because it was not a valid email.') elif self.delete_processed_messages: self._delete_message_with_uid(uid)
def _record_metrics(tags, submission_type, response, timer=None, xform=None): tags.update({ 'submission_type': submission_type, 'status_code': response.status_code }) if xform and xform.metadata and xform.metadata.timeEnd and xform.received_on: lag = xform.received_on - xform.metadata.timeEnd lag_days = lag.total_seconds() / 86400 metrics_histogram('commcare.xform_submissions.lag.days', lag_days, bucket_tag='lag', buckets=(1, 2, 4, 7, 14, 31, 90), bucket_unit='d', tags=tags) if timer: metrics_histogram('commcare.xform_submissions.duration.seconds', timer.duration, bucket_tag='duration', buckets=(1, 5, 20, 60, 120, 300, 600), bucket_unit='s', tags=tags) metrics_counter('commcare.xform_submissions.count', tags=tags)
def process_aws_notifications(self): self.mail.select('inbox') for uid, message in self._get_messages( f'(Header Delivered-To "{settings.RETURN_PATH_EMAIL}" ' f'Subject "AWS Notification Message")'): try: aws_info = self._get_aws_info(message, uid) for aws_meta in aws_info: if aws_meta.notification_type == NotificationType.BOUNCE: if aws_meta.main_type == BounceType.PERMANENT: self._record_permanent_bounce(aws_meta, uid) elif aws_meta.main_type == BounceType.TRANSIENT: self._record_transient_bounce(aws_meta, uid) elif aws_meta.main_type == BounceType.UNDETERMINED: self._handle_undetermined_bounce(aws_meta, uid) else: metrics_counter( 'commcare.bounced_email_manager.unexpected_bounce_type' ) self._label_problem_email( uid, extra_labels=["UnexpectedBounceType"]) _bounced_email_soft_assert( False, f'[{settings.SERVER_ENVIRONMENT}] ' f'Unexpected bounce type received. Check inbox.' ) elif aws_meta.notification_type == NotificationType.COMPLAINT: self._record_complaint(aws_meta, uid) except Exception as e: metrics_counter( 'commcare.bounced_email_manager.formatting_issues') self._label_problem_email(uid, extra_labels=["FormattingIssues"]) _bounced_email_soft_assert( False, f'[{settings.SERVER_ENVIRONMENT}] ' f'Issue processing AWS Notification Message: {e}')
def delete_sms_events(start_date, end_date): db = router.db_for_write(SMS) with atomic(db), connections[db].cursor() as cursor: cursor.execute( f""" UPDATE {SMS._meta.db_table} sms SET messaging_subevent_id = NULL FROM {MessagingSubEvent._meta.db_table} as subevent LEFT JOIN {MessagingEvent._meta.db_table} as event ON event.id = subevent.parent_id WHERE sms.messaging_subevent_id = subevent.id and event.date between %s and %s """, [start_date, end_date]) cursor.execute( f""" DELETE FROM {MessagingSubEvent._meta.db_table} as subevent USING {MessagingEvent._meta.db_table} as event WHERE subevent.parent_id = event.id AND event.date between %s and %s """, [start_date, end_date]) metrics_counter('commcare.sms_events.deleted', cursor.rowcount, tags={'type': 'sub_event'}) cursor.execute( f""" DELETE FROM {MessagingEvent._meta.db_table} WHERE date between %s and %s """, [start_date, end_date]) metrics_counter('commcare.sms_events.deleted', cursor.rowcount, tags={'type': 'event'})
def _reconcile_es_data(data_type, metric, blob_parent_id, start=None, end=None, republish=True): today = date.today() if not start: two_days_ago = today - timedelta(days=2) start = two_days_ago.isoformat() with TransientTempfile() as file_path: with open(file_path, 'w') as output_file: call_command('stale_data_in_es', data_type, start=start, end=end, stdout=output_file) with open(file_path, 'r') as f: reader = csv.reader(f, delimiter='\t') # ignore the headers next(reader) counts_by_domain = defaultdict(int) for line in reader: domain = line[3] counts_by_domain[domain] += 1 if counts_by_domain: for domain, count in counts_by_domain.items(): metrics_counter(metric, count, tags={'domain': domain}) else: metrics_counter(metric, 0) if republish: call_command('republish_doc_changes', file_path, skip_domains=True) with open(file_path, 'rb') as f: blob_db = get_blob_db() key = f'{blob_parent_id}_{today.isoformat()}' six_years = 60 * 24 * 365 * 6 blob_db.put( f, type_code=CODES.tempfile, domain='<unknown>', parent_id=blob_parent_id, key=key, timeout=six_years )
def check_repeaters(): start = datetime.utcnow() twentythree_hours_sec = 23 * 60 * 60 twentythree_hours_later = start + timedelta(hours=23) # Long timeout to allow all waiting repeat records to be iterated check_repeater_lock = get_redis_lock( CHECK_REPEATERS_KEY, timeout=twentythree_hours_sec, name=CHECK_REPEATERS_KEY, ) if not check_repeater_lock.acquire(blocking=False): metrics_counter("commcare.repeaters.check.locked_out") return try: with metrics_histogram_timer( "commcare.repeaters.check.processing", timing_buckets=_check_repeaters_buckets, ): for record in iterate_repeat_records(start): if not _soft_assert( datetime.utcnow() < twentythree_hours_later, "I've been iterating repeat records for 23 hours. I quit!" ): break metrics_counter("commcare.repeaters.check.attempt_forward") record.attempt_forward_now() else: iterating_time = datetime.utcnow() - start _soft_assert( iterating_time < timedelta(hours=6), f"It took {iterating_time} to iterate repeat records.") finally: check_repeater_lock.release()
def create_case_repeater_register(repeater, domain, payload): """ Only useful in a very specific edge case for CreateCaseRepeater If a CreateCaseRepeater has a missing repeat record, but the case now contains update transactions This can be used to properly trigger the missing repeat record. """ if not isinstance(repeater, CreateCaseRepeater): logger.error( f"Error - cannot call create_case_repeater_register on repeater type f{type(repeater)}" ) return if not create_case_repeater_allowed_to_forward(repeater, payload): return now = datetime.utcnow() repeat_record = RepeatRecord(repeater_id=repeater.get_id, repeater_type=repeater.doc_type, domain=domain, registered_on=now, next_check=now, payload_id=payload.get_id) metrics_counter('commcare.repeaters.new_record', tags={ 'domain': domain, 'doc_type': repeater.doc_type }) repeat_record.save() repeat_record.attempt_forward_now() return repeat_record
def log_repeater_success_in_datadog(domain, status_code, repeater_type): metrics_counter(REPEATER_SUCCESS_COUNT, tags={ 'domain': domain, 'status_code': status_code, 'repeater_type': repeater_type, })
def mark_local_bounced_email(bounced_addresses, message_id): from corehq.apps.sms.models import MessagingEvent, MessagingSubEvent from corehq.apps.users.models import Invitation, InvitationStatus if isinstance(message_id, str) and Invitation.EMAIL_ID_PREFIX in message_id: try: invite = Invitation.objects.get( uuid=message_id.split(Invitation.EMAIL_ID_PREFIX)[1]) except Invitation.DoesNotExist: pass else: invite.email_status = InvitationStatus.BOUNCED invite.save() else: try: subevent = MessagingSubEvent.objects.get(id=message_id) except MessagingSubEvent.DoesNotExist: pass else: metrics_counter('commcare.messaging.email.preemptively_bounced', len(bounced_addresses), tags={ 'domain': subevent.parent.domain, }) subevent.error(MessagingEvent.ERROR_EMAIL_BOUNCED, additional_error_text=", ".join(bounced_addresses))
def log_repeater_error_in_datadog(domain, status_code, repeater_type): metrics_counter(REPEATER_ERROR_COUNT, tags={ 'domain': domain, 'status_code': status_code, 'repeater_type': repeater_type, })
def jserror(request): agent = request.META.get('HTTP_USER_AGENT', None) os = browser_name = browser_version = bot = TAG_UNKNOWN if agent: parsed_agent = httpagentparser.detect(agent) bot = parsed_agent.get('bot', False) if 'os' in parsed_agent: os = parsed_agent['os'].get('name', TAG_UNKNOWN) if 'browser' in parsed_agent: browser_version = parsed_agent['browser'].get( 'version', TAG_UNKNOWN) browser_name = parsed_agent['browser'].get('name', TAG_UNKNOWN) metrics_counter('commcare.jserror.count', tags={ 'os': os, 'browser_version': browser_version, 'browser_name': browser_name, 'url': sanitize_url(request.POST.get('page', None)), 'file': request.POST.get('filename'), 'bot': bot, }) return HttpResponse('')
def log_repeater_success_in_datadog(domain, status_code, repeater_type): metrics_counter('commcare.repeaters.success', tags={ 'domain': domain, 'status_code': status_code, 'repeater_type': repeater_type, })
def _queue_chunk(indicators): now = datetime.utcnow() indicator_doc_ids = [i.doc_id for i in indicators] AsyncIndicator.objects.filter(doc_id__in=indicator_doc_ids).update( date_queued=now) build_async_indicators.delay(indicator_doc_ids) metrics_counter('commcare.async_indicator.indicators_queued', len(indicator_doc_ids))
def reconcile_actions_if_necessary(self, xform): if not self.check_action_order(): metrics_counter("commcare.form_processor.couch.reconcile_actions") try: self.reconcile_actions(rebuild=True, xforms={xform.form_id: xform}) except ReconciliationError: pass
def send_first_message(domain, recipient, phone_entry_or_number, session, responses, logged_subevent, workflow): # This try/except section is just here (temporarily) to support future refactors # If any of these notify, they should be replaced with a comment as to why the two are different # so that someone refactoring in the future will know that this or that param is necessary. try: if session.workflow != workflow: # see if we can eliminate the workflow arg notify_error('Exploratory: session.workflow != workflow', details={ 'session.workflow': session.workflow, 'workflow': workflow}) if session.connection_id != recipient.get_id: # see if we can eliminate the recipient arg notify_error('Exploratory: session.connection_id != recipient.get_id', details={ 'session.connection_id': session.connection_id, 'recipient.get_id': recipient.get_id, 'recipient': recipient }) if session.related_subevent != logged_subevent: # see if we can eliminate the logged_subevent arg notify_error('Exploratory: session.related_subevent != logged_subevent', details={ 'session.connection_id': session.connection_id, 'logged_subevent': logged_subevent}) except Exception: # The above running is not mission critical, so if it errors just leave a message in the log # for us to follow up on. # Absence of the message below and messages above ever notifying # will indicate that we can remove these args. notify_exception(None, "Error in section of code that's just supposed help inform future refactors") if toggles.ONE_PHONE_NUMBER_MULTIPLE_CONTACTS.enabled(domain): if not XFormsSessionSynchronization.claim_channel_for_session(session): send_first_message.apply_async( args=(domain, recipient, phone_entry_or_number, session, responses, logged_subevent, workflow), countdown=60 ) return metrics_counter('commcare.smsforms.session_started', 1, tags={'domain': domain, 'workflow': workflow}) if len(responses) > 0: message = format_message_list(responses) metadata = MessageMetadata( workflow=workflow, xforms_session_couch_id=session.couch_id, ) if isinstance(phone_entry_or_number, PhoneNumber): send_sms_to_verified_number( phone_entry_or_number, message, metadata, logged_subevent=logged_subevent ) else: send_sms( domain, recipient, phone_entry_or_number, message, metadata ) logged_subevent.completed()
def get_context(self, record): from corehq.util.metrics import metrics_counter try: request = record.request except Exception: request = None request_repr = get_sanitized_request_repr(request) tb_list = [] if record.exc_info: etype, _value, tb = record.exc_info value = clean_exception(_value) tb_list = ['Traceback (most recent call first):\n'] formatted_exception = traceback.format_exception_only(etype, value) tb_list.extend(formatted_exception) extracted_tb = list(reversed(traceback.extract_tb(tb))) tb_list.extend(traceback.format_list(extracted_tb)) stack_trace = '\n'.join(tb_list) subject = '%s: %s' % (record.levelname, formatted_exception[0].strip() if formatted_exception else record.getMessage()) else: stack_trace = 'No stack trace available' subject = '%s: %s' % (record.levelname, record.getMessage()) context = defaultdict(lambda: '') context.update({ 'subject': self.format_subject(subject), 'message': record.getMessage(), 'details': getattr(record, 'details', None), 'tb_list': tb_list, 'request_repr': request_repr, 'stack_trace': stack_trace, }) if request: sanitized_url = sanitize_url(request.build_absolute_uri()) metrics_counter('commcare.error.count', tags={ 'url': sanitized_url, 'group': get_url_group(sanitized_url), 'domain': getattr(request, 'domain', TAG_UNKNOWN), }) context.update({ 'get': list(request.GET.items()), 'post': SafeExceptionReporterFilter().get_post_parameters(request), 'method': request.method, 'username': request.user.username if getattr(request, 'user', None) else "", 'url': request.build_absolute_uri(), }) return context
def maybe_not_found(throw=None): try: yield except ClientError as err: if not is_not_found(err): raise metrics_counter('commcare.blobdb.notfound') if throw is not None: raise throw