Esempio n. 1
0
    def run(self):
        '''
        Starts the import/export job by calling the subclass' `_run_task()`
        method. Catches all exceptions!  Suitable to be called by an
        asynchronous task runner (Celery)
        '''
        with transaction.atomic():
            _refetched_self = self._meta.model.objects.get(pk=self.pk)
            self.status = _refetched_self.status
            del _refetched_self
            if self.status == self.COMPLETE:
                return
            elif self.status != self.CREATED:
                # possibly a concurrent task?
                raise Exception(
                    'only recently created {}s can be executed'.format(
                        self._meta.model_name))
            self.status = self.PROCESSING
            self.save(update_fields=['status'])

        msgs = defaultdict(list)
        try:
            # This method must be implemented by a subclass
            self._run_task(msgs)
            self.status = self.COMPLETE
        except Exception, err:
            msgs['error_type'] = type(err).__name__
            msgs['error'] = err.message
            self.status = self.ERROR
            logging.error('Failed to run %s: %s' %
                          (self._meta.model_name, repr(err)),
                          exc_info=True)
    def send(self):
        """
        Sends data to external endpoint
        :return: bool
        """

        success = False
        response = None  # Need to declare response before requests.post assignment in case of RequestException
        if self._data:
            try:
                request_kwargs = self._prepare_request_kwargs()

                # Add custom headers
                request_kwargs.get("headers").update(
                    self._hook.settings.get("custom_headers", {}))

                # Add user agent
                public_domain = "- {} ".format(os.getenv("PUBLIC_DOMAIN_NAME"))\
                    if os.getenv("PUBLIC_DOMAIN_NAME") else ""
                request_kwargs.get("headers").update({
                    "User-Agent":
                    "KoBoToolbox external service {}#{}".format(
                        public_domain, self._hook.uid)
                })

                # If the request needs basic authentication with username & password,
                # let's provide them
                if self._hook.auth_level == Hook.BASIC_AUTH:
                    request_kwargs.update({
                        "auth": (self._hook.settings.get("username"),
                                 self._hook.settings.get("password"))
                    })
                response = requests.post(self._hook.endpoint,
                                         timeout=30,
                                         **request_kwargs)
                response.raise_for_status()
                self.save_log(response.status_code, response.text, True)
                success = True
            except requests.exceptions.RequestException as e:
                # If request fails to communicate with remote server. Exception is raised before
                # request.post can return something. Thus, response equals None
                status_code = KOBO_INTERNAL_ERROR_STATUS_CODE
                text = str(e)
                if response is not None:
                    text = response.text
                    status_code = response.status_code
                self.save_log(status_code, text)

            except Exception as e:
                logging.error(
                    "service_json.ServiceDefinition.send - Hook #{} - Data #{} - {}"
                    .format(self._hook.uid, self._instance_id, str(e)),
                    exc_info=True)
                self.save_log(
                    KOBO_INTERNAL_ERROR_STATUS_CODE,
                    "An error occurred when sending data to external endpoint")
        else:
            self.save_log(KOBO_INTERNAL_ERROR_STATUS_CODE, "No data available")

        return success
 def get_enketo_survey_links(self):
     data = {
         'server_url':
         u'{}/{}'.format(settings.KOBOCAT_URL.rstrip('/'),
                         self.asset.owner.username),
         'form_id':
         self.backend_response['id_string']
     }
     try:
         response = requests.post(
             u'{}{}'.format(settings.ENKETO_SERVER,
                            settings.ENKETO_SURVEY_ENDPOINT),
             # bare tuple implies basic auth
             auth=(settings.ENKETO_API_TOKEN, ''),
             data=data)
         response.raise_for_status()
     except requests.exceptions.RequestException as e:
         # Don't 500 the entire asset view if Enketo is unreachable
         logging.error('Failed to retrieve links from Enketo',
                       exc_info=True)
         return {}
     try:
         links = response.json()
     except ValueError:
         logging.error('Received invalid JSON from Enketo', exc_info=True)
         return {}
     for discard in ('enketo_id', 'code', 'preview_iframe_url'):
         try:
             del links[discard]
         except KeyError:
             pass
     return links
Esempio n. 4
0
    def generate_xml_from_source(self,
                                 source,
                                 include_note=False,
                                 root_node_name='snapshot_xml',
                                 form_title=None,
                                 id_string=None):
        if form_title is None:
            form_title = 'Snapshot XML'
        if id_string is None:
            id_string = 'snapshot_xml'

        if include_note and 'survey' in source:
            _translations = source.get('translations', [])
            _label = include_note
            if len(_translations) > 0:
                _label = [_label for t in _translations]
            source['survey'].append({
                'type': 'note',
                'name': 'prepended_note',
                'label': _label
            })

        source_copy = copy.deepcopy(source)
        self._expand_kobo_qs(source_copy)
        self._populate_fields_with_autofields(source_copy)
        self._strip_kuids(source_copy)

        warnings = []
        details = {}
        try:
            xml = FormPack({'content': source_copy},
                           root_node_name=root_node_name,
                           id_string=id_string,
                           title=form_title)[0].to_xml(warnings=warnings)

            details.update({
                'status': 'success',
                'warnings': warnings,
            })
        except Exception as err:
            err_message = str(err)
            logging.error('Failed to generate xform for asset',
                          extra={
                              'src': source,
                              'id_string': id_string,
                              'uid': self.uid,
                              '_msg': err_message,
                              'warnings': warnings,
                          })
            xml = ''
            details.update({
                'status': 'failure',
                'error_type': type(err).__name__,
                'error': err_message,
                'warnings': warnings,
            })
        return xml, details
 def _get_data(self):
     """
     Retrieves data from deployment backend of the asset.
     """
     try:
         submission = self._hook.asset.deployment.get_submission(self._instance_id, self._hook.export_type)
         return self._parse_data(submission, self._hook.subset_fields)
     except Exception as e:
         logging.error("service_json.ServiceDefinition._get_data - Hook #{} - Data #{} - {}".format(
             self._hook.uid, self._instance_id, str(e)), exc_info=True)
     return None
Esempio n. 6
0
    def run(self):
        """
        Starts the import/export job by calling the subclass' `_run_task()`
        method. Catches all exceptions!  Suitable to be called by an
        asynchronous task runner (Celery)
        """
        with transaction.atomic():
            _refetched_self = self._meta.model.objects.get(pk=self.pk)
            self.status = _refetched_self.status
            del _refetched_self
            if self.status == self.COMPLETE:
                return
            elif self.status != self.CREATED:
                # possibly a concurrent task?
                raise Exception(
                    'only recently created {}s can be executed'.format(
                        self._meta.model_name))
            self.status = self.PROCESSING
            self.save(update_fields=['status'])

        msgs = defaultdict(list)
        try:
            # This method must be implemented by a subclass
            self._run_task(msgs)
            self.status = self.COMPLETE
        except ExportTaskBase.InaccessibleData as e:
            msgs['error_type'] = t('Cannot access data')
            msg['error'] = str(e)
            self.status = self.ERROR
        # TODO: continue to make more specific exceptions as above until this
        # catch-all can be removed entirely
        except Exception as err:
            msgs['error_type'] = type(err).__name__
            msgs['error'] = str(err)
            self.status = self.ERROR
            logging.error('Failed to run %s: %s' %
                          (self._meta.model_name, repr(err)),
                          exc_info=True)

        self.messages.update(msgs)
        # Record the processing time for diagnostic purposes
        self.data['processing_time_seconds'] = (
            datetime.datetime.now(self.date_created.tzinfo) -
            self.date_created).total_seconds()
        try:
            self.save(update_fields=['status', 'messages', 'data'])
        except TypeError as e:
            self.status = self.ERROR
            logging.error('Failed to save %s: %s' %
                          (self._meta.model_name, repr(e)),
                          exc_info=True)
            self.save(update_fields=['status'])
Esempio n. 7
0
    def retry(self):
        """
        Retries to send data to external service
        :return: boolean
        """
        try:
            ServiceDefinition = self.hook.get_service_definition()
            service_definition = ServiceDefinition(self.hook, self.instance_id)
            service_definition.send()
            self.refresh_from_db()
        except Exception as e:
            logging.error("HookLog.retry - {}".format(str(e)), exc_info=True)
            self.change_status(HOOK_LOG_FAILED)
            return False

        return True
 def _get_data(self):
     """
     Retrieves data from deployment backend of the asset.
     """
     try:
         submission = self._hook.asset.deployment.get_submission(
             self._submission_id,
             user=self._hook.asset.owner,
             format_type=self._hook.export_type,
         )
         return self._parse_data(submission, self._hook.subset_fields)
     except Exception as e:
         logging.error(
             'service_json.ServiceDefinition._get_data: '
             f'Hook #{self._hook.uid} - Data #{self._submission_id} - '
             f'{str(e)}',
             exc_info=True)
     return None
Esempio n. 9
0
def __get_realm(request):
    subdomain = get_subdomain(request)
    realm_name = subdomain

    allowed_connections_url = '{}/customer-service/api/allowed-connections'.format(settings.OC_BUILD_URL)
    allowed_connections_response = None
    try:
        allowed_connections_response = requests.get(
                allowed_connections_url,
                params = { 'subdomain': subdomain }
            )
    except Exception as e:
        kpi_logging.error("oc_views {}".format(str(e)), exc_info=True)

    if isinstance(allowed_connections_response, Response):
        realm_name = allowed_connections_response.json()[0]
    
    return realm_name
    def save_log(self, status_code: int, message: str, success: bool = False):
        """
        Updates/creates log entry with:
        - `status_code` as the HTTP status code of the remote server response
        - `message` as the content of the remote server response
        """
        fields = {
            'hook': self._hook,
            'submission_id': self._submission_id
        }
        try:
            # Try to load the log with a multiple field FK because
            # we don't know the log `uid` in this context, but we do know
            # its `hook` FK and its `submission_id`
            log = HookLog.objects.get(**fields)
        except HookLog.DoesNotExist:
            log = HookLog(**fields)

        if success:
            log.status = HOOK_LOG_SUCCESS
        elif log.tries >= constance.config.HOOK_MAX_RETRIES:
            log.status = HOOK_LOG_FAILED

        log.status_code = status_code

        # We want to clean up HTML, so first, we try to create a json object.
        # In case of failure, it should be HTML (or plaintext), we can remove
        # tags
        try:
            json.loads(message)
        except ValueError:
            message = re.sub(r"<[^>]*>", " ", message).strip()

        log.message = message

        try:
            log.save()
        except Exception as e:
            logging.error(
                f'ServiceDefinitionInterface.save_log - {str(e)}',
                exc_info=True,
            )
Esempio n. 11
0
    def save_log(self, status_code, message, success=False):
        """
        Updates/creates log entry

        :param success: bool.
        :param status_code: int. HTTP status code
        :param message: str.
        """
        fields = {
            "hook": self._hook,
            "instance_id": self._instance_id
        }
        try:
            # Try to load the log with a multiple field FK because
            # we don't know the log `uid` in this context, but we do know
            # its `hook` FK and its `instance.id
            log = HookLog.objects.get(**fields)
        except HookLog.DoesNotExist:
            log = HookLog(**fields)

        if success:
            log.status = HOOK_LOG_SUCCESS
        elif log.tries >= constance.config.HOOK_MAX_RETRIES:
            log.status = HOOK_LOG_FAILED

        log.status_code = status_code

        # We want to clean up HTML, so first, we try to create a json object.
        # In case of failure, it should be HTML (or plaintext), we can remove tags
        try:
            json.loads(message)
        except ValueError:
            message = re.sub(r"<[^>]*>", " ", message).strip()

        log.message = message

        try:
            log.save()
        except Exception as e:
            logging.error("ServiceDefinitionInterface.save_log - {}".format(str(e)), exc_info=True)
Esempio n. 12
0
    def get_mp3_content(self) -> bytes:
        """
        Convert and return MP3 content of File object located at
        `self.absolute_path`.
        """

        if not hasattr(self, 'mimetype') or not hasattr(self, 'absolute_path'):
            raise NotImplementedError(
                'Parent class does not implement `mimetype` or `absolute_path')

        supported_formats = (
            'audio',
            'video',
        )

        if not self.mimetype.startswith(supported_formats):
            raise NotSupportedFormatException

        ffmpeg_command = [
            '/usr/bin/ffmpeg',
            '-i',
            self.absolute_path,
            '-f',
            self.CONVERSION_AUDIO_FORMAT,
            'pipe:1',
        ]

        pipe = subprocess.run(
            ffmpeg_command,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )

        if pipe.returncode:
            logging.error(f'ffmpeg error: {pipe.stderr}')
            raise FFMpegException

        return pipe.stdout
Esempio n. 13
0
    def get_submissions(self,
                        format_type=INSTANCE_FORMAT_TYPE_JSON,
                        instances_ids=[]):
        """
        Retreives submissions through Postgres or Mongo depending on `format_type`.
        It can be filtered on instances uuids.
        `uuid` is used instead of `id` because `id` is not available in ReadOnlyInstance model

        :param format_type: str. INSTANCE_FORMAT_TYPE_JSON|INSTANCE_FORMAT_TYPE_XML
        :param instances_ids: list. Optional
        :return: list: mixed
        """
        submissions = []
        getter = getattr(
            self, "_{}__get_submissions_in_{}".format(self.__class__.__name__,
                                                      format_type))
        try:
            submissions = getter(instances_ids)
        except Exception as e:
            logging.error(
                "KobocatDeploymentBackend.get_submissions  - {}".format(
                    str(e)))

        return submissions
def _import_user_assets(from_user, to_user):
    user = to_user

    # now, if a user wants to re-import, they can delete the asset from kpi
    # and re-run management command
    already_migrated_sds = user.survey_drafts.exclude(kpi_asset_uid='')
    for migrated_sd in already_migrated_sds.all():
        _kpi_uid = migrated_sd.kpi_asset_uid
        if Asset.objects.filter(uid=_kpi_uid).count() == 0:
            migrated_sd.kpi_asset_uid = ''
            migrated_sd.save()

    not_already_migrated = user.survey_drafts.filter(kpi_asset_uid='')
    user_survey_drafts = not_already_migrated.filter(asset_type=None)
    user_qlib_assets = not_already_migrated.exclude(asset_type=None)

    def _import_asset(asset, parent_collection=None, asset_type='survey'):
        survey_dict = _csv_to_dict(asset.body)
        obj = {
            'name': asset.name,
            'date_created': asset.date_created,
            'date_modified': asset.date_modified,
            'asset_type': asset_type,
            'owner': user,
        }

        if parent_collection is not None:
            obj['parent'] = parent_collection
            del obj['name']
        new_asset = Asset(**obj)

        _set_auto_field_update(Asset, "date_created", False)
        _set_auto_field_update(Asset, "date_modified", False)
        new_asset.content = survey_dict
        new_asset.date_created = obj['date_created']
        new_asset.date_modified = obj['date_modified']
        new_asset.save()
        _set_auto_field_update(Asset, "date_created", True)
        _set_auto_field_update(Asset, "date_modified", True)

        # Note on the old draft the uid of the new asset
        asset.kpi_asset_uid = new_asset.uid
        asset.save()

        return new_asset

    for survey_draft in user_survey_drafts.all():
        try:
            new_asset = _import_asset(survey_draft, asset_type='survey')
        except KeyboardInterrupt:
            raise
        except Exception:
            message = (u'Failed to migrate survey draft with name="{}" '
                       u'and pk={}').format(survey_draft.name, survey_draft.pk)
            logging.error(message, exc_info=True)

    (qlib, _) = Collection.objects.get_or_create(name="question library",
                                                 owner=user)

    for qlib_asset in user_qlib_assets.all():
        try:
            new_asset = _import_asset(qlib_asset, qlib, asset_type='block')
        except:
            message = (u'Failed to migrate library asset with name="{}" '
                       u'and pk={}').format(qlib_asset.name, qlib_asset.pk)
            logging.error(message, exc_info=True)

    _set_auto_field_update(Asset, "date_created", False)
    _set_auto_field_update(Asset, "date_modified", False)
    qlib.date_created = user.date_joined
    qlib.date_modified = user.date_joined
    qlib.save()
    _set_auto_field_update(Asset, "date_created", True)
    _set_auto_field_update(Asset, "date_modified", True)
Esempio n. 15
0
class ImportExportTask(models.Model):
    '''
    A common base model for asynchronous import and exports. Must be
    subclassed to be useful. Subclasses must implement the `_run_task()` method
    '''
    class Meta:
        abstract = True

    CREATED = 'created'
    PROCESSING = 'processing'
    COMPLETE = 'complete'
    ERROR = 'error'

    STATUS_CHOICES = (
        (CREATED, CREATED),
        (PROCESSING, PROCESSING),
        (ERROR, ERROR),
        (COMPLETE, COMPLETE),
    )

    user = models.ForeignKey('auth.User')
    data = JSONField()
    messages = JSONField(default={})
    status = models.CharField(choices=STATUS_CHOICES,
                              max_length=32,
                              default=CREATED)
    date_created = models.DateTimeField(auto_now_add=True)

    # date_expired = models.DateTimeField(null=True)

    def run(self):
        '''
        Starts the import/export job by calling the subclass' `_run_task()`
        method. Catches all exceptions!  Suitable to be called by an
        asynchronous task runner (Celery)
        '''
        with transaction.atomic():
            _refetched_self = self._meta.model.objects.get(pk=self.pk)
            self.status = _refetched_self.status
            del _refetched_self
            if self.status == self.COMPLETE:
                return
            elif self.status != self.CREATED:
                # possibly a concurrent task?
                raise Exception(
                    'only recently created {}s can be executed'.format(
                        self._meta.model_name))
            self.status = self.PROCESSING
            self.save(update_fields=['status'])

        msgs = defaultdict(list)
        try:
            # This method must be implemented by a subclass
            self._run_task(msgs)
            self.status = self.COMPLETE
        except Exception, err:
            msgs['error_type'] = type(err).__name__
            msgs['error'] = err.message
            self.status = self.ERROR
            logging.error('Failed to run %s: %s' %
                          (self._meta.model_name, repr(err)),
                          exc_info=True)

        self.messages.update(msgs)
        # Record the processing time for diagnostic purposes
        self.data['processing_time_seconds'] = (
            datetime.datetime.now(self.date_created.tzinfo) -
            self.date_created).total_seconds()
        try:
            self.save(update_fields=['status', 'messages', 'data'])
        except TypeError, e:
            self.status = self.ERROR
            logging.error('Failed to save %s: %s' %
                          (self._meta.model_name, repr(e)),
                          exc_info=True)
            self.save(update_fields=['status'])
Esempio n. 16
0
def failures_reports():
    """
    Notifies owners' assets by email of hooks failures.
    :return: bool
    """
    beat_schedule = settings.CELERY_BEAT_SCHEDULE.get(
        "send-hooks-failures-reports")
    # Use `.first()` instead of `.get()`, because task can be duplicated in admin section

    failures_reports_period_task = PeriodicTask.objects.filter(
        enabled=True,
        task=beat_schedule.get('task')).order_by("-last_run_at").first()

    if failures_reports_period_task:

        last_run_at = failures_reports_period_task.last_run_at
        queryset = HookLog.objects.filter(hook__email_notification=True,
                                          status=HOOK_LOG_FAILED)
        if last_run_at:
            queryset = queryset.filter(date_modified__gte=last_run_at)

        queryset = queryset.order_by('hook__asset__name', 'hook__uid',
                                     '-date_modified')

        # PeriodicTask are updated every 3 minutes (default).
        # It means, if this task interval is less than 3 minutes, some data can be duplicated in emails.
        # Setting `beat-sync-every` to 1, makes PeriodicTask to be updated before running the task.
        # So, we need to update it manually.
        # see: http://docs.celeryproject.org/en/latest/userguide/configuration.html#beat-sync-every
        PeriodicTask.objects.filter(task=beat_schedule.get("task")). \
            update(last_run_at=timezone.now())

        records = {}
        max_length = 0

        # Prepare data for templates.
        # All logs will be grouped under their respective asset and user.
        for record in queryset:
            # if users don't exist in dict, add them
            if record.hook.asset.owner.id not in records:
                records[record.hook.asset.owner.id] = {
                    'username': record.hook.asset.owner.username,
                    # language is not implemented yet.
                    # TODO add language to user table in registration process
                    'language': getattr(record.hook.asset.owner, 'language',
                                        'en'),
                    'email': record.hook.asset.owner.email,
                    'assets': {}
                }

            # if asset doesn't exist in user's asset dict, add it
            if record.hook.asset.uid not in records[
                    record.hook.asset.owner.id]['assets']:
                max_length = 0
                records[record.hook.asset.owner.id]['assets'][
                    record.hook.asset.uid] = {
                        'name': record.hook.asset.name,
                        'max_length': 0,
                        'logs': []
                    }

            # Add log to corresponding asset and user
            records[record.hook.asset.owner.id]['assets'][
                record.hook.asset.uid]['logs'].append({
                    'hook_name':
                    record.hook.name,
                    'uid':
                    record.uid,
                    'date_modified':
                    record.date_modified,
                    'status_code':
                    record.status_code,
                    'message':
                    record.message
                })
            hook_name_length = len(record.hook.name)

            # Max Length is used for plain text template. To display fixed size columns.
            max_length = max(max_length, hook_name_length)
            records[record.hook.asset.owner.id]['assets'][
                record.hook.asset.uid]['max_length'] = max_length

        # Get templates
        plain_text_template = get_template('reports/failures_email_body.txt')
        html_template = get_template('reports/failures_email_body.html')
        email_messages = []

        for owner_id, record in records.items():
            variables = {
                'username': record.get('username'),
                'assets': record.get('assets'),
                'kpi_base_url': settings.KPI_URL
            }
            # Localize templates
            translation.activate(record.get("language"))
            text_content = plain_text_template.render(variables)
            html_content = html_template.render(variables)

            msg = EmailMultiAlternatives(
                translation.ugettext('REST Services Failure Report'),
                text_content, constance.config.SUPPORT_EMAIL,
                [record.get('email')])
            msg.attach_alternative(html_content, 'text/html')
            email_messages.append(msg)

        # Send email messages
        if len(email_messages) > 0:
            try:
                with get_connection() as connection:
                    connection.send_messages(email_messages)
            except Exception as e:
                logging.error('failures_reports - {}'.format(str(e)),
                              exc_info=True)
                return False

    return True
    def send(self):
        """
        Sends data to external endpoint
        :return: bool
        """

        success = False
        # Need to declare response before requests.post assignment in case of
        # RequestException
        response = None
        if self._data:
            try:
                request_kwargs = self._prepare_request_kwargs()

                # Add custom headers
                request_kwargs.get("headers").update(
                    self._hook.settings.get("custom_headers", {}))

                # Add user agent
                public_domain = "- {} ".format(os.getenv("PUBLIC_DOMAIN_NAME")) \
                    if os.getenv("PUBLIC_DOMAIN_NAME") else ""
                request_kwargs.get("headers").update({
                    "User-Agent": "KoboToolbox external service {}#{}".format(
                        public_domain,
                        self._hook.uid)
                })

                # If the request needs basic authentication with username and
                # password, let's provide them
                if self._hook.auth_level == Hook.BASIC_AUTH:
                    request_kwargs.update({
                        "auth": (self._hook.settings.get("username"),
                                 self._hook.settings.get("password"))
                    })

                ssrf_protect_options = {}
                if constance.config.SSRF_ALLOWED_IP_ADDRESS.strip():
                    ssrf_protect_options['allowed_ip_addresses'] = constance.\
                        config.SSRF_ALLOWED_IP_ADDRESS.strip().split('\r\n')

                if constance.config.SSRF_DENIED_IP_ADDRESS.strip():
                    ssrf_protect_options['denied_ip_addresses'] = constance.\
                        config.SSRF_DENIED_IP_ADDRESS.strip().split('\r\n')

                SSRFProtect.validate(self._hook.endpoint,
                                     options=ssrf_protect_options)

                response = requests.post(self._hook.endpoint, timeout=30,
                                         **request_kwargs)
                response.raise_for_status()
                self.save_log(response.status_code, response.text, True)
                success = True
            except requests.exceptions.RequestException as e:
                # If request fails to communicate with remote server.
                # Exception is raised before request.post can return something.
                # Thus, response equals None
                status_code = KOBO_INTERNAL_ERROR_STATUS_CODE
                text = str(e)
                if response is not None:
                    text = response.text
                    status_code = response.status_code
                self.save_log(status_code, text)
            except SSRFProtectException as e:
                logging.error(
                    'service_json.ServiceDefinition.send: '
                    f'Hook #{self._hook.uid} - '
                    f'Data #{self._submission_id} - '
                    f'{str(e)}',
                    exc_info=True)
                self.save_log(
                    KOBO_INTERNAL_ERROR_STATUS_CODE,
                    f'{self._hook.endpoint} is not allowed')
            except Exception as e:
                logging.error(
                    'service_json.ServiceDefinition.send: '
                    f'Hook #{self._hook.uid} - '
                    f'Data #{self._submission_id} - '
                    f'{str(e)}',
                    exc_info=True)
                self.save_log(
                    KOBO_INTERNAL_ERROR_STATUS_CODE,
                    "An error occurred when sending data to external endpoint")
        else:
            self.save_log(
                KOBO_INTERNAL_ERROR_STATUS_CODE,
                "No data available")

        return success
Esempio n. 18
0
def build_formpack(asset, submission_stream=None, use_all_form_versions=True):
    """
    Return a tuple containing a `FormPack` instance and the iterable stream of
    submissions for the given `asset`. If `use_all_form_versions` is `False`,
    then only the newest version of the form is considered, and all submissions
    are assumed to have been collected with that version of the form.
    """
    FUZZY_VERSION_ID_KEY = '_version_'
    INFERRED_VERSION_ID_KEY = '__inferred_version__'

    if not asset.has_deployment:
        raise Exception('Cannot build formpack for asset without deployment')

    if use_all_form_versions:
        _versions = asset.deployed_versions
    else:
        _versions = [asset.deployed_versions.first()]

    schemas = []
    version_ids_newest_first = []
    for v in _versions:
        try:
            fp_schema = v.to_formpack_schema()
        # FIXME: should FormPack validation errors have their own
        # exception class?
        except TypeError as e:
            # https://github.com/kobotoolbox/kpi/issues/1361
            logging.error('Failed to get formpack schema for version: %s' %
                          repr(e),
                          exc_info=True)
        else:
            fp_schema['version_id_key'] = INFERRED_VERSION_ID_KEY
            schemas.append(fp_schema)
            version_ids_newest_first.append(v.uid)
            if v.uid_aliases:
                version_ids_newest_first.extend(v.uid_aliases)

    if not schemas:
        raise Exception('Cannot build formpack without any schemas')

    # FormPack() expects the versions to be ordered from oldest to newest
    pack = FormPack(versions=reversed(schemas),
                    title=asset.name,
                    id_string=asset.uid)

    # Find the AssetVersion UID for each deprecated reversion ID
    _reversion_ids = dict([(str(v._reversion_version_id), v.uid)
                           for v in _versions if v._reversion_version_id])

    # A submission often contains many version keys, e.g. `__version__`,
    # `_version_`, `_version__001`, `_version__002`, each with a different
    # version id (see https://github.com/kobotoolbox/kpi/issues/1465). To cope,
    # assume that the newest version of this asset whose id appears in the
    # submission is the proper one to use
    def _infer_version_id(submission):
        if not use_all_form_versions:
            submission[INFERRED_VERSION_ID_KEY] = version_ids_newest_first[0]
            return submission

        submission_version_ids = [
            val for key, val in submission.items()
            if FUZZY_VERSION_ID_KEY in key
        ]
        # Replace any deprecated reversion IDs with the UIDs of their
        # corresponding AssetVersions
        submission_version_ids = [
            _reversion_ids[x] if x in _reversion_ids else x
            for x in submission_version_ids
        ]
        inferred_version_id = None
        for extant_version_id in version_ids_newest_first:
            if extant_version_id in submission_version_ids:
                inferred_version_id = extant_version_id
                break
        if not inferred_version_id:
            # Fall back on the latest version
            # TODO: log a warning?
            inferred_version_id = version_ids_newest_first[0]
        submission[INFERRED_VERSION_ID_KEY] = inferred_version_id
        return submission

    if submission_stream is None:
        _userform_id = asset.deployment.mongo_userform_id
        if not _userform_id.startswith(asset.owner.username):
            raise Exception('asset has unexpected `mongo_userform_id`')

        submission_stream = asset.deployment.get_submissions(
            requesting_user_id=asset.owner.id)

    submission_stream = (_infer_version_id(submission)
                         for submission in submission_stream)

    return pack, submission_stream