def run(self): ''' Starts the import/export job by calling the subclass' `_run_task()` method. Catches all exceptions! Suitable to be called by an asynchronous task runner (Celery) ''' with transaction.atomic(): _refetched_self = self._meta.model.objects.get(pk=self.pk) self.status = _refetched_self.status del _refetched_self if self.status == self.COMPLETE: return elif self.status != self.CREATED: # possibly a concurrent task? raise Exception( 'only recently created {}s can be executed'.format( self._meta.model_name)) self.status = self.PROCESSING self.save(update_fields=['status']) msgs = defaultdict(list) try: # This method must be implemented by a subclass self._run_task(msgs) self.status = self.COMPLETE except Exception, err: msgs['error_type'] = type(err).__name__ msgs['error'] = err.message self.status = self.ERROR logging.error('Failed to run %s: %s' % (self._meta.model_name, repr(err)), exc_info=True)
def send(self): """ Sends data to external endpoint :return: bool """ success = False response = None # Need to declare response before requests.post assignment in case of RequestException if self._data: try: request_kwargs = self._prepare_request_kwargs() # Add custom headers request_kwargs.get("headers").update( self._hook.settings.get("custom_headers", {})) # Add user agent public_domain = "- {} ".format(os.getenv("PUBLIC_DOMAIN_NAME"))\ if os.getenv("PUBLIC_DOMAIN_NAME") else "" request_kwargs.get("headers").update({ "User-Agent": "KoBoToolbox external service {}#{}".format( public_domain, self._hook.uid) }) # If the request needs basic authentication with username & password, # let's provide them if self._hook.auth_level == Hook.BASIC_AUTH: request_kwargs.update({ "auth": (self._hook.settings.get("username"), self._hook.settings.get("password")) }) response = requests.post(self._hook.endpoint, timeout=30, **request_kwargs) response.raise_for_status() self.save_log(response.status_code, response.text, True) success = True except requests.exceptions.RequestException as e: # If request fails to communicate with remote server. Exception is raised before # request.post can return something. Thus, response equals None status_code = KOBO_INTERNAL_ERROR_STATUS_CODE text = str(e) if response is not None: text = response.text status_code = response.status_code self.save_log(status_code, text) except Exception as e: logging.error( "service_json.ServiceDefinition.send - Hook #{} - Data #{} - {}" .format(self._hook.uid, self._instance_id, str(e)), exc_info=True) self.save_log( KOBO_INTERNAL_ERROR_STATUS_CODE, "An error occurred when sending data to external endpoint") else: self.save_log(KOBO_INTERNAL_ERROR_STATUS_CODE, "No data available") return success
def get_enketo_survey_links(self): data = { 'server_url': u'{}/{}'.format(settings.KOBOCAT_URL.rstrip('/'), self.asset.owner.username), 'form_id': self.backend_response['id_string'] } try: response = requests.post( u'{}{}'.format(settings.ENKETO_SERVER, settings.ENKETO_SURVEY_ENDPOINT), # bare tuple implies basic auth auth=(settings.ENKETO_API_TOKEN, ''), data=data) response.raise_for_status() except requests.exceptions.RequestException as e: # Don't 500 the entire asset view if Enketo is unreachable logging.error('Failed to retrieve links from Enketo', exc_info=True) return {} try: links = response.json() except ValueError: logging.error('Received invalid JSON from Enketo', exc_info=True) return {} for discard in ('enketo_id', 'code', 'preview_iframe_url'): try: del links[discard] except KeyError: pass return links
def generate_xml_from_source(self, source, include_note=False, root_node_name='snapshot_xml', form_title=None, id_string=None): if form_title is None: form_title = 'Snapshot XML' if id_string is None: id_string = 'snapshot_xml' if include_note and 'survey' in source: _translations = source.get('translations', []) _label = include_note if len(_translations) > 0: _label = [_label for t in _translations] source['survey'].append({ 'type': 'note', 'name': 'prepended_note', 'label': _label }) source_copy = copy.deepcopy(source) self._expand_kobo_qs(source_copy) self._populate_fields_with_autofields(source_copy) self._strip_kuids(source_copy) warnings = [] details = {} try: xml = FormPack({'content': source_copy}, root_node_name=root_node_name, id_string=id_string, title=form_title)[0].to_xml(warnings=warnings) details.update({ 'status': 'success', 'warnings': warnings, }) except Exception as err: err_message = str(err) logging.error('Failed to generate xform for asset', extra={ 'src': source, 'id_string': id_string, 'uid': self.uid, '_msg': err_message, 'warnings': warnings, }) xml = '' details.update({ 'status': 'failure', 'error_type': type(err).__name__, 'error': err_message, 'warnings': warnings, }) return xml, details
def _get_data(self): """ Retrieves data from deployment backend of the asset. """ try: submission = self._hook.asset.deployment.get_submission(self._instance_id, self._hook.export_type) return self._parse_data(submission, self._hook.subset_fields) except Exception as e: logging.error("service_json.ServiceDefinition._get_data - Hook #{} - Data #{} - {}".format( self._hook.uid, self._instance_id, str(e)), exc_info=True) return None
def run(self): """ Starts the import/export job by calling the subclass' `_run_task()` method. Catches all exceptions! Suitable to be called by an asynchronous task runner (Celery) """ with transaction.atomic(): _refetched_self = self._meta.model.objects.get(pk=self.pk) self.status = _refetched_self.status del _refetched_self if self.status == self.COMPLETE: return elif self.status != self.CREATED: # possibly a concurrent task? raise Exception( 'only recently created {}s can be executed'.format( self._meta.model_name)) self.status = self.PROCESSING self.save(update_fields=['status']) msgs = defaultdict(list) try: # This method must be implemented by a subclass self._run_task(msgs) self.status = self.COMPLETE except ExportTaskBase.InaccessibleData as e: msgs['error_type'] = t('Cannot access data') msg['error'] = str(e) self.status = self.ERROR # TODO: continue to make more specific exceptions as above until this # catch-all can be removed entirely except Exception as err: msgs['error_type'] = type(err).__name__ msgs['error'] = str(err) self.status = self.ERROR logging.error('Failed to run %s: %s' % (self._meta.model_name, repr(err)), exc_info=True) self.messages.update(msgs) # Record the processing time for diagnostic purposes self.data['processing_time_seconds'] = ( datetime.datetime.now(self.date_created.tzinfo) - self.date_created).total_seconds() try: self.save(update_fields=['status', 'messages', 'data']) except TypeError as e: self.status = self.ERROR logging.error('Failed to save %s: %s' % (self._meta.model_name, repr(e)), exc_info=True) self.save(update_fields=['status'])
def retry(self): """ Retries to send data to external service :return: boolean """ try: ServiceDefinition = self.hook.get_service_definition() service_definition = ServiceDefinition(self.hook, self.instance_id) service_definition.send() self.refresh_from_db() except Exception as e: logging.error("HookLog.retry - {}".format(str(e)), exc_info=True) self.change_status(HOOK_LOG_FAILED) return False return True
def _get_data(self): """ Retrieves data from deployment backend of the asset. """ try: submission = self._hook.asset.deployment.get_submission( self._submission_id, user=self._hook.asset.owner, format_type=self._hook.export_type, ) return self._parse_data(submission, self._hook.subset_fields) except Exception as e: logging.error( 'service_json.ServiceDefinition._get_data: ' f'Hook #{self._hook.uid} - Data #{self._submission_id} - ' f'{str(e)}', exc_info=True) return None
def __get_realm(request): subdomain = get_subdomain(request) realm_name = subdomain allowed_connections_url = '{}/customer-service/api/allowed-connections'.format(settings.OC_BUILD_URL) allowed_connections_response = None try: allowed_connections_response = requests.get( allowed_connections_url, params = { 'subdomain': subdomain } ) except Exception as e: kpi_logging.error("oc_views {}".format(str(e)), exc_info=True) if isinstance(allowed_connections_response, Response): realm_name = allowed_connections_response.json()[0] return realm_name
def save_log(self, status_code: int, message: str, success: bool = False): """ Updates/creates log entry with: - `status_code` as the HTTP status code of the remote server response - `message` as the content of the remote server response """ fields = { 'hook': self._hook, 'submission_id': self._submission_id } try: # Try to load the log with a multiple field FK because # we don't know the log `uid` in this context, but we do know # its `hook` FK and its `submission_id` log = HookLog.objects.get(**fields) except HookLog.DoesNotExist: log = HookLog(**fields) if success: log.status = HOOK_LOG_SUCCESS elif log.tries >= constance.config.HOOK_MAX_RETRIES: log.status = HOOK_LOG_FAILED log.status_code = status_code # We want to clean up HTML, so first, we try to create a json object. # In case of failure, it should be HTML (or plaintext), we can remove # tags try: json.loads(message) except ValueError: message = re.sub(r"<[^>]*>", " ", message).strip() log.message = message try: log.save() except Exception as e: logging.error( f'ServiceDefinitionInterface.save_log - {str(e)}', exc_info=True, )
def save_log(self, status_code, message, success=False): """ Updates/creates log entry :param success: bool. :param status_code: int. HTTP status code :param message: str. """ fields = { "hook": self._hook, "instance_id": self._instance_id } try: # Try to load the log with a multiple field FK because # we don't know the log `uid` in this context, but we do know # its `hook` FK and its `instance.id log = HookLog.objects.get(**fields) except HookLog.DoesNotExist: log = HookLog(**fields) if success: log.status = HOOK_LOG_SUCCESS elif log.tries >= constance.config.HOOK_MAX_RETRIES: log.status = HOOK_LOG_FAILED log.status_code = status_code # We want to clean up HTML, so first, we try to create a json object. # In case of failure, it should be HTML (or plaintext), we can remove tags try: json.loads(message) except ValueError: message = re.sub(r"<[^>]*>", " ", message).strip() log.message = message try: log.save() except Exception as e: logging.error("ServiceDefinitionInterface.save_log - {}".format(str(e)), exc_info=True)
def get_mp3_content(self) -> bytes: """ Convert and return MP3 content of File object located at `self.absolute_path`. """ if not hasattr(self, 'mimetype') or not hasattr(self, 'absolute_path'): raise NotImplementedError( 'Parent class does not implement `mimetype` or `absolute_path') supported_formats = ( 'audio', 'video', ) if not self.mimetype.startswith(supported_formats): raise NotSupportedFormatException ffmpeg_command = [ '/usr/bin/ffmpeg', '-i', self.absolute_path, '-f', self.CONVERSION_AUDIO_FORMAT, 'pipe:1', ] pipe = subprocess.run( ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) if pipe.returncode: logging.error(f'ffmpeg error: {pipe.stderr}') raise FFMpegException return pipe.stdout
def get_submissions(self, format_type=INSTANCE_FORMAT_TYPE_JSON, instances_ids=[]): """ Retreives submissions through Postgres or Mongo depending on `format_type`. It can be filtered on instances uuids. `uuid` is used instead of `id` because `id` is not available in ReadOnlyInstance model :param format_type: str. INSTANCE_FORMAT_TYPE_JSON|INSTANCE_FORMAT_TYPE_XML :param instances_ids: list. Optional :return: list: mixed """ submissions = [] getter = getattr( self, "_{}__get_submissions_in_{}".format(self.__class__.__name__, format_type)) try: submissions = getter(instances_ids) except Exception as e: logging.error( "KobocatDeploymentBackend.get_submissions - {}".format( str(e))) return submissions
def _import_user_assets(from_user, to_user): user = to_user # now, if a user wants to re-import, they can delete the asset from kpi # and re-run management command already_migrated_sds = user.survey_drafts.exclude(kpi_asset_uid='') for migrated_sd in already_migrated_sds.all(): _kpi_uid = migrated_sd.kpi_asset_uid if Asset.objects.filter(uid=_kpi_uid).count() == 0: migrated_sd.kpi_asset_uid = '' migrated_sd.save() not_already_migrated = user.survey_drafts.filter(kpi_asset_uid='') user_survey_drafts = not_already_migrated.filter(asset_type=None) user_qlib_assets = not_already_migrated.exclude(asset_type=None) def _import_asset(asset, parent_collection=None, asset_type='survey'): survey_dict = _csv_to_dict(asset.body) obj = { 'name': asset.name, 'date_created': asset.date_created, 'date_modified': asset.date_modified, 'asset_type': asset_type, 'owner': user, } if parent_collection is not None: obj['parent'] = parent_collection del obj['name'] new_asset = Asset(**obj) _set_auto_field_update(Asset, "date_created", False) _set_auto_field_update(Asset, "date_modified", False) new_asset.content = survey_dict new_asset.date_created = obj['date_created'] new_asset.date_modified = obj['date_modified'] new_asset.save() _set_auto_field_update(Asset, "date_created", True) _set_auto_field_update(Asset, "date_modified", True) # Note on the old draft the uid of the new asset asset.kpi_asset_uid = new_asset.uid asset.save() return new_asset for survey_draft in user_survey_drafts.all(): try: new_asset = _import_asset(survey_draft, asset_type='survey') except KeyboardInterrupt: raise except Exception: message = (u'Failed to migrate survey draft with name="{}" ' u'and pk={}').format(survey_draft.name, survey_draft.pk) logging.error(message, exc_info=True) (qlib, _) = Collection.objects.get_or_create(name="question library", owner=user) for qlib_asset in user_qlib_assets.all(): try: new_asset = _import_asset(qlib_asset, qlib, asset_type='block') except: message = (u'Failed to migrate library asset with name="{}" ' u'and pk={}').format(qlib_asset.name, qlib_asset.pk) logging.error(message, exc_info=True) _set_auto_field_update(Asset, "date_created", False) _set_auto_field_update(Asset, "date_modified", False) qlib.date_created = user.date_joined qlib.date_modified = user.date_joined qlib.save() _set_auto_field_update(Asset, "date_created", True) _set_auto_field_update(Asset, "date_modified", True)
class ImportExportTask(models.Model): ''' A common base model for asynchronous import and exports. Must be subclassed to be useful. Subclasses must implement the `_run_task()` method ''' class Meta: abstract = True CREATED = 'created' PROCESSING = 'processing' COMPLETE = 'complete' ERROR = 'error' STATUS_CHOICES = ( (CREATED, CREATED), (PROCESSING, PROCESSING), (ERROR, ERROR), (COMPLETE, COMPLETE), ) user = models.ForeignKey('auth.User') data = JSONField() messages = JSONField(default={}) status = models.CharField(choices=STATUS_CHOICES, max_length=32, default=CREATED) date_created = models.DateTimeField(auto_now_add=True) # date_expired = models.DateTimeField(null=True) def run(self): ''' Starts the import/export job by calling the subclass' `_run_task()` method. Catches all exceptions! Suitable to be called by an asynchronous task runner (Celery) ''' with transaction.atomic(): _refetched_self = self._meta.model.objects.get(pk=self.pk) self.status = _refetched_self.status del _refetched_self if self.status == self.COMPLETE: return elif self.status != self.CREATED: # possibly a concurrent task? raise Exception( 'only recently created {}s can be executed'.format( self._meta.model_name)) self.status = self.PROCESSING self.save(update_fields=['status']) msgs = defaultdict(list) try: # This method must be implemented by a subclass self._run_task(msgs) self.status = self.COMPLETE except Exception, err: msgs['error_type'] = type(err).__name__ msgs['error'] = err.message self.status = self.ERROR logging.error('Failed to run %s: %s' % (self._meta.model_name, repr(err)), exc_info=True) self.messages.update(msgs) # Record the processing time for diagnostic purposes self.data['processing_time_seconds'] = ( datetime.datetime.now(self.date_created.tzinfo) - self.date_created).total_seconds() try: self.save(update_fields=['status', 'messages', 'data']) except TypeError, e: self.status = self.ERROR logging.error('Failed to save %s: %s' % (self._meta.model_name, repr(e)), exc_info=True) self.save(update_fields=['status'])
def failures_reports(): """ Notifies owners' assets by email of hooks failures. :return: bool """ beat_schedule = settings.CELERY_BEAT_SCHEDULE.get( "send-hooks-failures-reports") # Use `.first()` instead of `.get()`, because task can be duplicated in admin section failures_reports_period_task = PeriodicTask.objects.filter( enabled=True, task=beat_schedule.get('task')).order_by("-last_run_at").first() if failures_reports_period_task: last_run_at = failures_reports_period_task.last_run_at queryset = HookLog.objects.filter(hook__email_notification=True, status=HOOK_LOG_FAILED) if last_run_at: queryset = queryset.filter(date_modified__gte=last_run_at) queryset = queryset.order_by('hook__asset__name', 'hook__uid', '-date_modified') # PeriodicTask are updated every 3 minutes (default). # It means, if this task interval is less than 3 minutes, some data can be duplicated in emails. # Setting `beat-sync-every` to 1, makes PeriodicTask to be updated before running the task. # So, we need to update it manually. # see: http://docs.celeryproject.org/en/latest/userguide/configuration.html#beat-sync-every PeriodicTask.objects.filter(task=beat_schedule.get("task")). \ update(last_run_at=timezone.now()) records = {} max_length = 0 # Prepare data for templates. # All logs will be grouped under their respective asset and user. for record in queryset: # if users don't exist in dict, add them if record.hook.asset.owner.id not in records: records[record.hook.asset.owner.id] = { 'username': record.hook.asset.owner.username, # language is not implemented yet. # TODO add language to user table in registration process 'language': getattr(record.hook.asset.owner, 'language', 'en'), 'email': record.hook.asset.owner.email, 'assets': {} } # if asset doesn't exist in user's asset dict, add it if record.hook.asset.uid not in records[ record.hook.asset.owner.id]['assets']: max_length = 0 records[record.hook.asset.owner.id]['assets'][ record.hook.asset.uid] = { 'name': record.hook.asset.name, 'max_length': 0, 'logs': [] } # Add log to corresponding asset and user records[record.hook.asset.owner.id]['assets'][ record.hook.asset.uid]['logs'].append({ 'hook_name': record.hook.name, 'uid': record.uid, 'date_modified': record.date_modified, 'status_code': record.status_code, 'message': record.message }) hook_name_length = len(record.hook.name) # Max Length is used for plain text template. To display fixed size columns. max_length = max(max_length, hook_name_length) records[record.hook.asset.owner.id]['assets'][ record.hook.asset.uid]['max_length'] = max_length # Get templates plain_text_template = get_template('reports/failures_email_body.txt') html_template = get_template('reports/failures_email_body.html') email_messages = [] for owner_id, record in records.items(): variables = { 'username': record.get('username'), 'assets': record.get('assets'), 'kpi_base_url': settings.KPI_URL } # Localize templates translation.activate(record.get("language")) text_content = plain_text_template.render(variables) html_content = html_template.render(variables) msg = EmailMultiAlternatives( translation.ugettext('REST Services Failure Report'), text_content, constance.config.SUPPORT_EMAIL, [record.get('email')]) msg.attach_alternative(html_content, 'text/html') email_messages.append(msg) # Send email messages if len(email_messages) > 0: try: with get_connection() as connection: connection.send_messages(email_messages) except Exception as e: logging.error('failures_reports - {}'.format(str(e)), exc_info=True) return False return True
def send(self): """ Sends data to external endpoint :return: bool """ success = False # Need to declare response before requests.post assignment in case of # RequestException response = None if self._data: try: request_kwargs = self._prepare_request_kwargs() # Add custom headers request_kwargs.get("headers").update( self._hook.settings.get("custom_headers", {})) # Add user agent public_domain = "- {} ".format(os.getenv("PUBLIC_DOMAIN_NAME")) \ if os.getenv("PUBLIC_DOMAIN_NAME") else "" request_kwargs.get("headers").update({ "User-Agent": "KoboToolbox external service {}#{}".format( public_domain, self._hook.uid) }) # If the request needs basic authentication with username and # password, let's provide them if self._hook.auth_level == Hook.BASIC_AUTH: request_kwargs.update({ "auth": (self._hook.settings.get("username"), self._hook.settings.get("password")) }) ssrf_protect_options = {} if constance.config.SSRF_ALLOWED_IP_ADDRESS.strip(): ssrf_protect_options['allowed_ip_addresses'] = constance.\ config.SSRF_ALLOWED_IP_ADDRESS.strip().split('\r\n') if constance.config.SSRF_DENIED_IP_ADDRESS.strip(): ssrf_protect_options['denied_ip_addresses'] = constance.\ config.SSRF_DENIED_IP_ADDRESS.strip().split('\r\n') SSRFProtect.validate(self._hook.endpoint, options=ssrf_protect_options) response = requests.post(self._hook.endpoint, timeout=30, **request_kwargs) response.raise_for_status() self.save_log(response.status_code, response.text, True) success = True except requests.exceptions.RequestException as e: # If request fails to communicate with remote server. # Exception is raised before request.post can return something. # Thus, response equals None status_code = KOBO_INTERNAL_ERROR_STATUS_CODE text = str(e) if response is not None: text = response.text status_code = response.status_code self.save_log(status_code, text) except SSRFProtectException as e: logging.error( 'service_json.ServiceDefinition.send: ' f'Hook #{self._hook.uid} - ' f'Data #{self._submission_id} - ' f'{str(e)}', exc_info=True) self.save_log( KOBO_INTERNAL_ERROR_STATUS_CODE, f'{self._hook.endpoint} is not allowed') except Exception as e: logging.error( 'service_json.ServiceDefinition.send: ' f'Hook #{self._hook.uid} - ' f'Data #{self._submission_id} - ' f'{str(e)}', exc_info=True) self.save_log( KOBO_INTERNAL_ERROR_STATUS_CODE, "An error occurred when sending data to external endpoint") else: self.save_log( KOBO_INTERNAL_ERROR_STATUS_CODE, "No data available") return success
def build_formpack(asset, submission_stream=None, use_all_form_versions=True): """ Return a tuple containing a `FormPack` instance and the iterable stream of submissions for the given `asset`. If `use_all_form_versions` is `False`, then only the newest version of the form is considered, and all submissions are assumed to have been collected with that version of the form. """ FUZZY_VERSION_ID_KEY = '_version_' INFERRED_VERSION_ID_KEY = '__inferred_version__' if not asset.has_deployment: raise Exception('Cannot build formpack for asset without deployment') if use_all_form_versions: _versions = asset.deployed_versions else: _versions = [asset.deployed_versions.first()] schemas = [] version_ids_newest_first = [] for v in _versions: try: fp_schema = v.to_formpack_schema() # FIXME: should FormPack validation errors have their own # exception class? except TypeError as e: # https://github.com/kobotoolbox/kpi/issues/1361 logging.error('Failed to get formpack schema for version: %s' % repr(e), exc_info=True) else: fp_schema['version_id_key'] = INFERRED_VERSION_ID_KEY schemas.append(fp_schema) version_ids_newest_first.append(v.uid) if v.uid_aliases: version_ids_newest_first.extend(v.uid_aliases) if not schemas: raise Exception('Cannot build formpack without any schemas') # FormPack() expects the versions to be ordered from oldest to newest pack = FormPack(versions=reversed(schemas), title=asset.name, id_string=asset.uid) # Find the AssetVersion UID for each deprecated reversion ID _reversion_ids = dict([(str(v._reversion_version_id), v.uid) for v in _versions if v._reversion_version_id]) # A submission often contains many version keys, e.g. `__version__`, # `_version_`, `_version__001`, `_version__002`, each with a different # version id (see https://github.com/kobotoolbox/kpi/issues/1465). To cope, # assume that the newest version of this asset whose id appears in the # submission is the proper one to use def _infer_version_id(submission): if not use_all_form_versions: submission[INFERRED_VERSION_ID_KEY] = version_ids_newest_first[0] return submission submission_version_ids = [ val for key, val in submission.items() if FUZZY_VERSION_ID_KEY in key ] # Replace any deprecated reversion IDs with the UIDs of their # corresponding AssetVersions submission_version_ids = [ _reversion_ids[x] if x in _reversion_ids else x for x in submission_version_ids ] inferred_version_id = None for extant_version_id in version_ids_newest_first: if extant_version_id in submission_version_ids: inferred_version_id = extant_version_id break if not inferred_version_id: # Fall back on the latest version # TODO: log a warning? inferred_version_id = version_ids_newest_first[0] submission[INFERRED_VERSION_ID_KEY] = inferred_version_id return submission if submission_stream is None: _userform_id = asset.deployment.mongo_userform_id if not _userform_id.startswith(asset.owner.username): raise Exception('asset has unexpected `mongo_userform_id`') submission_stream = asset.deployment.get_submissions( requesting_user_id=asset.owner.id) submission_stream = (_infer_version_id(submission) for submission in submission_stream) return pack, submission_stream