예제 #1
0
def _export_async_export_response(request, xform, export, dataview_pk=None):
    """
    Checks the export status and generates the reponse
    :param request:
    :param xform:
    :param export:
    :return: response dict example {"job_status": "Success", "export_url": ...}
    """
    if export.status == Export.SUCCESSFUL:
        if export.export_type not in [Export.EXTERNAL_EXPORT,
                                      Export.GOOGLE_SHEETS_EXPORT]:
            export_url = reverse(
                'export-detail',
                kwargs={'pk': export.pk},
                request=request)
        else:
            export_url = export.export_url
        resp = async_status(SUCCESSFUL)
        resp['export_url'] = export_url
    elif export.status == Export.PENDING:
        resp = async_status(PENDING)
    else:
        resp = async_status(FAILED, export.error_message)

    return resp
예제 #2
0
def get_async_response(job_uuid, request, xform, count=0):
    """
    Returns the status of an async task for the given job_uuid.
    """
    def _get_response():
        export = get_object_or_404(Export, task_id=job_uuid)
        return export_async_export_response(request, export)

    try:
        job = AsyncResult(job_uuid)
        if job.state == 'SUCCESS':
            resp = _get_response()
        else:
            resp = async_status(celery_state_to_status(job.state))

            # append task result to the response
            if job.result:
                result = job.result
                if isinstance(result, dict):
                    resp.update(result)
                else:
                    resp.update({'progress': str(result)})
    except (OperationalError, ConnectionError) as e:
        report_exception("Connection Error", e, sys.exc_info())
        if count > 0:
            raise ServiceUnavailable

        return get_async_response(job_uuid, request, xform, count + 1)
    except BacklogLimitExceeded:
        # most likely still processing
        resp = async_status(celery_state_to_status('PENDING'))

    return resp
예제 #3
0
def get_async_csv_submission_status(job_uuid):
    """ Gets CSV Submision progress or result
    Can be used to pol long running submissions
    :param str job_uuid: The submission job uuid returned by _submit_csv.delay
    :return: Dict with import progress info (insertions & total)
    :rtype: Dict
    """
    if not job_uuid:
        return async_status(FAILED, u'Empty job uuid')

    job = AsyncResult(job_uuid)
    try:
        # result = (job.result or job.state)
        if job.state not in ['SUCCESS', 'FAILURE']:
            response = async_status(celery_state_to_status(job.state))
            response.update(job.info)

            return response

        if job.state == 'FAILURE':
            return async_status(celery_state_to_status(job.state),
                                text(job.result))

    except BacklogLimitExceeded:
        return async_status(celery_state_to_status('PENDING'))

    return job.get()
예제 #4
0
def get_async_csv_submission_status(job_uuid):
    """ Gets CSV Submision progress or result
    Can be used to pol long running submissions
    :param str job_uuid: The submission job uuid returned by _submit_csv.delay
    :return: Dict with import progress info (insertions & total)
    :rtype: Dict
    """
    if not job_uuid:
        return async_status(FAILED, u'Empty job uuid')

    job = AsyncResult(job_uuid)
    try:
        result = (job.result or job.state)

        if isinstance(result, (Exception)):
            return async_status(celery_state_to_status(job.state),
                                job.result.message)

        if isinstance(result, (str, unicode)):
            return async_status(celery_state_to_status(job.state))

    except BacklogLimitExceeded:
        return async_status(celery_state_to_status('PENDING'))

    return result
예제 #5
0
def get_async_response(job_uuid, request, xform, count=0):
    try:
        job = AsyncResult(job_uuid)
        if job.state == 'SUCCESS':
            export_id = job.result
            export = get_object_or_404(Export, id=export_id)

            resp = _export_async_export_response(request, xform, export)
        else:
            resp = async_status(celery_state_to_status(job.state))

            # append task result to the response
            if job.result:
                resp.update(job.result) if isinstance(job.result, dict) else \
                    resp.update({'progress': str(job.result)})
    except ConnectionError as e:
        if count > 0:
            raise ServiceUnavailable(unicode(e))

        return get_async_response(job_uuid, request, xform, count + 1)
    except BacklogLimitExceeded:
        # most likely still processing
        resp = async_status(celery_state_to_status('PENDING'))

    return resp
예제 #6
0
 def test_async_status(self):
     self.assertEqual(async_status.status_msg[async_status.PENDING],
                      async_status.async_status(async_status.PENDING)
                      .get('job_status'))
     self.assertEqual(async_status.status_msg[async_status.SUCCESSFUL],
                      async_status.async_status(async_status.SUCCESSFUL)
                      .get('job_status'))
     self.assertEqual(async_status.status_msg[async_status.FAILED],
                      async_status.async_status(async_status.FAILED)
                      .get('job_status'))
     self.assertTrue(async_status.
                     async_status(async_status.FAILED, 'has error')
                     .get('error'))
     self.assertFalse(async_status.
                      async_status(async_status.SUCCESSFUL).get('error'))
예제 #7
0
 def test_async_status(self):
     self.assertEqual(async_status.status_msg[async_status.PENDING],
                      async_status.async_status(async_status.PENDING)
                      .get('job_status'))
     self.assertEqual(async_status.status_msg[async_status.SUCCESSFUL],
                      async_status.async_status(async_status.SUCCESSFUL)
                      .get('job_status'))
     self.assertEqual(async_status.status_msg[async_status.FAILED],
                      async_status.async_status(async_status.FAILED)
                      .get('job_status'))
     self.assertTrue(async_status.
                     async_status(async_status.FAILED, 'has error')
                     .get('error'))
     self.assertFalse(async_status.
                      async_status(async_status.SUCCESSFUL).get('error'))
예제 #8
0
def failed_import(rollback_uuids, xform, exception, status_message):
    """ Report a failed import.
    :param rollback_uuids: The rollback UUIDs
    :param xform: The XForm that failed to import to
    :param exception: The exception object
    :return: The async_status result
    """
    Instance.objects.filter(uuid__in=rollback_uuids, xform=xform).delete()
    report_exception(
        'CSV Import Failed : %d - %s - %s' %
        (xform.pk, xform.id_string, xform.title), exception, sys.exc_info())
    return async_status(FAILED, status_message)
예제 #9
0
def get_async_response(job_uuid, request, xform, count=0):
    try:
        job = AsyncResult(job_uuid)
        if job.state == 'SUCCESS':
            export_id = job.result
            export = Export.objects.get(id=export_id)

            resp = _export_async_export_response(request, xform, export)
        else:
            resp = async_status(celery_state_to_status(job.state))
    except ConnectionError, e:
        if count > 0:
            raise ServiceUnavailable(unicode(e))

        return get_async_response(job_uuid, request, xform, count + 1)
예제 #10
0
def _generate_new_export(request,
                         xform,
                         query,
                         export_type,
                         dataview_pk=False):
    query = _set_start_end_params(request, query)
    extension = _get_extension_from_export_type(export_type)

    options = {
        "extension": extension,
        "username": xform.user.username,
        "id_string": xform.id_string,
    }
    if query:
        options['query'] = query

    options["dataview_pk"] = dataview_pk
    if export_type == Export.GOOGLE_SHEETS_EXPORT:
        options['google_credentials'] = \
            _get_google_credential(request).to_json()

    try:
        if export_type == Export.EXTERNAL_EXPORT:
            options['token'] = request.GET.get('token')
            options['data_id'] = request.GET.get('data_id')
            options['meta'] = request.GET.get('meta')

            export = generate_external_export(export_type,
                                              xform.user.username,
                                              xform.id_string,
                                              None,
                                              options,
                                              xform=xform)
        elif export_type == Export.OSM_EXPORT:
            export = generate_osm_export(export_type,
                                         xform.user.username,
                                         xform.id_string,
                                         None,
                                         options,
                                         xform=xform)
        elif export_type == Export.ZIP_EXPORT:
            export = generate_attachments_zip_export(export_type,
                                                     xform.user.username,
                                                     xform.id_string,
                                                     None,
                                                     options,
                                                     xform=xform)
        elif export_type == Export.KML_EXPORT:
            export = generate_kml_export(export_type,
                                         xform.user.username,
                                         xform.id_string,
                                         None,
                                         options,
                                         xform=xform)
        else:
            options.update(parse_request_export_options(request.query_params))

            export = generate_export(export_type, xform, None, options)

        audit = {"xform": xform.id_string, "export_type": export_type}
        log.audit_log(
            log.Actions.EXPORT_CREATED, request.user, xform.user,
            _("Created %(export_type)s export on '%(id_string)s'.") % {
                'id_string': xform.id_string,
                'export_type': export_type.upper()
            }, audit, request)
    except NoRecordsFoundError:
        raise Http404(_("No records found to export"))
    except J2XException as e:
        # j2x exception
        return async_status(FAILED, str(e))
    except SPSSIOError as e:
        raise exceptions.ParseError(str(e))
    else:
        return export
예제 #11
0
def submit_csv(username, xform, csv_file, overwrite=False):
    """Imports CSV data to an existing form

    Takes a csv formatted file or string containing rows of submission/instance
    and converts those to xml submissions and finally submits them by calling
    :py:func:`onadata.libs.utils.logger_tools.safe_create_instance`

    :param str username: the subission user
    :param onadata.apps.logger.models.XForm xfrom: The submission's XForm.
    :param (str or file): A CSV formatted file with submission rows.
    :return: If sucessful, a dict with import summary else dict with error str.
    :rtype: Dict
    """
    if isinstance(csv_file, str):
        csv_file = BytesIO(csv_file)
    elif csv_file is None or not hasattr(csv_file, 'read'):
        return async_status(
            FAILED,
            (u'Invalid param type for `csv_file`. '
             'Expected utf-8 encoded file or unicode'
             ' string got {} instead.'.format(type(csv_file).__name__)))

    num_rows = sum(1 for row in csv_file) - 1
    csv_file.seek(0)

    csv_reader = ucsv.DictReader(csv_file, encoding='utf-8-sig')
    csv_header = csv_reader.fieldnames

    # check for spaces in headers
    if any(' ' in header for header in csv_header):
        return async_status(FAILED,
                            u'CSV file fieldnames should not contain spaces')

    # Get the data dictionary
    xform_header = xform.get_headers()

    missing_col = set(xform_header).difference(csv_header)
    addition_col = set(csv_header).difference(xform_header)

    # change to list
    missing_col = list(missing_col)
    addition_col = list(addition_col)
    # remove all metadata columns
    missing = [
        col for col in missing_col
        if not col.startswith("_") and col not in IGNORED_COLUMNS
    ]

    # remove all metadata inside groups
    missing = [col for col in missing if '/_' not in col]

    # ignore if is multiple select question
    for col in csv_header:
        # this col is a multiple select question
        survey_element = xform.get_survey_element(col)
        if survey_element and \
                survey_element.get('type') == MULTIPLE_SELECT_TYPE:
            # remove from the missing and additional list
            missing = [x for x in missing if not x.startswith(col)]

            addition_col.remove(col)

    # remove headers for repeats that might be missing from csv
    missing = sorted([m for m in missing if m.find('[') == -1])

    # Include additional repeats
    addition_col = [a for a in addition_col if a.find('[') == -1]

    if missing:
        return async_status(
            FAILED, u"Sorry uploaded file does not match the form. "
            u"The file is missing the column(s): "
            u"{0}.".format(', '.join(missing)))

    if overwrite:
        xform.instances.filter(deleted_at__isnull=True)\
            .update(deleted_at=timezone.now(),
                    deleted_by=User.objects.get(username=username))

    rollback_uuids = []
    submission_time = datetime.utcnow().isoformat()
    ona_uuid = {'formhub': {'uuid': xform.uuid}}
    error = None
    additions = duplicates = inserts = 0
    try:
        for row in csv_reader:
            # remove the additional columns
            for index in addition_col:
                del row[index]

            # fetch submission uuid before purging row metadata
            row_uuid = row.get('meta/instanceID') or row.get('_uuid')
            submitted_by = row.get('_submitted_by')
            submission_date = row.get('_submission_time', submission_time)

            location_data = {}
            for key in list(row):  # seems faster than a comprehension
                # remove metadata (keys starting with '_')
                if key.startswith('_'):
                    del row[key]

                # Collect row location data into separate location_data dict
                if key.endswith(
                    ('.latitude', '.longitude', '.altitude', '.precision')):
                    location_key, location_prop = key.rsplit(u'.', 1)
                    location_data.setdefault(location_key, {}).update(
                        {location_prop: row.get(key, '0')})
                # remove 'n/a' values
                if not key.startswith('_') and row[key] == 'n/a':
                    del row[key]

            # collect all location K-V pairs into single geopoint field(s)
            # in location_data dict
            for location_key in list(location_data):
                location_data.update({
                    location_key: (u'%(latitude)s %(longitude)s '
                                   '%(altitude)s %(precision)s') %
                    defaultdict(lambda: '', location_data.get(location_key))
                })

            row = csv_dict_to_nested_dict(row)
            location_data = csv_dict_to_nested_dict(location_data)

            row = dict_merge(row, location_data)

            # inject our form's uuid into the submission
            row.update(ona_uuid)

            old_meta = row.get('meta', {})
            new_meta, update = get_submission_meta_dict(xform, row_uuid)
            inserts += update
            old_meta.update(new_meta)
            row.update({'meta': old_meta})

            row_uuid = row.get('meta').get('instanceID')
            rollback_uuids.append(row_uuid.replace('uuid:', ''))

            xml_file = BytesIO(
                dict2xmlsubmission(row, xform, row_uuid, submission_date))

            try:
                error, instance = safe_create_instance(username, xml_file, [],
                                                       xform.uuid, None)
            except ValueError as e:
                error = e

            if error:
                if not (isinstance(error, OpenRosaResponse)
                        and error.status_code == 202):
                    Instance.objects.filter(uuid__in=rollback_uuids,
                                            xform=xform).delete()
                    return async_status(FAILED, text(error))
                else:
                    duplicates += 1
            else:
                additions += 1
                if additions % PROGRESS_BATCH_UPDATE == 0:
                    try:
                        current_task.update_state(state='PROGRESS',
                                                  meta={
                                                      'progress': additions,
                                                      'total': num_rows,
                                                      'info': addition_col
                                                  })
                        print(current_task)
                    except Exception:
                        logging.exception(
                            _(u'Could not update state of '
                              'import CSV batch process.'))
                    finally:
                        xform.submission_count(True)

                users = User.objects.filter(
                    username=submitted_by) if submitted_by else []
                if users:
                    instance.user = users[0]
                    instance.save()

    except UnicodeDecodeError as e:
        return failed_import(rollback_uuids, xform, e,
                             u'CSV file must be utf-8 encoded')
    except Exception as e:
        return failed_import(rollback_uuids, xform, e, text(e))
    finally:
        xform.submission_count(True)

    return {
        "additions": additions - inserts,
        "duplicates": duplicates,
        u"updates": inserts,
        u"info": u"Additional column(s) excluded from the upload: '{0}'."
                 .format(', '.join(list(addition_col)))
    }  # yapf: disable
예제 #12
0
def submit_csv(username, xform, csv_file, overwrite=False):
    """Imports CSV data to an existing form

    Takes a csv formatted file or string containing rows of submission/instance
    and converts those to xml submissions and finally submits them by calling
    :py:func:`onadata.libs.utils.logger_tools.safe_create_instance`

    :param str username: the submission user
    :param onadata.apps.logger.models.XForm xform: The submission's XForm.
    :param (str or file) csv_file: A CSV formatted file with submission rows.
    :return: If sucessful, a dict with import summary else dict with error str.
    :rtype: Dict
    """
    csv_file_validation_summary = validate_csv_file(csv_file, xform)

    if csv_file_validation_summary.get('valid'):
        additional_col = csv_file_validation_summary.get('additional_col')
    else:
        return async_status(
            FAILED,
            csv_file_validation_summary.get('error_msg')
        )

    num_rows = sum(1 for row in csv_file) - 1

    # Change stream position to start of file
    csv_file.seek(0)

    csv_reader = ucsv.DictReader(csv_file, encoding='utf-8-sig')
    xform_json = json.loads(xform.json)
    select_multiples = [
        qstn.name for qstn in
        xform.get_survey_elements_of_type(MULTIPLE_SELECT_TYPE)]
    ona_uuid = {'formhub': {'uuid': xform.uuid}}
    additions = duplicates = inserts = 0
    rollback_uuids = []
    errors = {}

    # Retrieve the columns we should validate values for
    # Currently validating date, datetime, integer and decimal columns
    col_to_validate = {
        'date': (get_columns_by_type(XLS_DATE_FIELDS, xform_json), parse),
        'datetime': (
            get_columns_by_type(XLS_DATETIME_FIELDS, xform_json), parse),
        'integer': (get_columns_by_type(['integer'], xform_json), int),
        'decimal': (get_columns_by_type(['decimal'], xform_json), float)
    }

    if overwrite:
        instance_ids = [i['id'] for i in xform.instances.values('id')]
        xform.instances.filter(deleted_at__isnull=True)\
            .update(deleted_at=timezone.now(),
                    deleted_by=User.objects.get(username=username))
        # send message
        send_message(
            instance_id=instance_ids, target_id=xform.id,
            target_type=XFORM, user=User.objects.get(username=username),
            message_verb=SUBMISSION_DELETED)

    try:
        for row_no, row in enumerate(csv_reader):
            # Remove additional columns
            for index in additional_col:
                del row[index]

            # Remove 'n/a' and '' values from csv
            row = {k: v for (k, v) in row.items() if v not in [NA_REP, '']}

            row, error = validate_row(row, col_to_validate)

            if error:
                errors[row_no] = error

            # Only continue the process if no errors where encountered while
            # validating the data
            if not errors:
                location_data = {}

                for key in list(row):
                    # Collect row location data into separate location_data
                    # dict
                    if key.endswith(('.latitude', '.longitude', '.altitude',
                                    '.precision')):
                        location_key, location_prop = key.rsplit(u'.', 1)
                        location_data.setdefault(location_key, {}).update({
                            location_prop:
                            row.get(key, '0')
                        })

                # collect all location K-V pairs into single geopoint field(s)
                # in location_data dict
                for location_key in list(location_data):
                    location_data.update({
                        location_key:
                        (u'%(latitude)s %(longitude)s '
                            '%(altitude)s %(precision)s') % defaultdict(
                            lambda: '', location_data.get(location_key))
                    })

                nested_dict = csv_dict_to_nested_dict(
                    row, select_multiples=select_multiples)
                row = flatten_split_select_multiples(
                    nested_dict, select_multiples=select_multiples)
                location_data = csv_dict_to_nested_dict(location_data)
                # Merge location_data into the Row data
                row = dict_merge(row, location_data)

                submission_time = datetime.utcnow().isoformat()
                row_uuid = row.get('meta/instanceID') or 'uuid:{}'.format(
                    row.get(UUID)) if row.get(UUID) else None
                submitted_by = row.get('_submitted_by')
                submission_date = row.get('_submission_time', submission_time)

                for key in list(row):
                    # remove metadata (keys starting with '_')
                    if key.startswith('_'):
                        del row[key]

                # Inject our forms uuid into the submission
                row.update(ona_uuid)

                old_meta = row.get('meta', {})
                new_meta, update = get_submission_meta_dict(xform, row_uuid)
                inserts += update
                old_meta.update(new_meta)
                row.update({'meta': old_meta})

                row_uuid = row.get('meta').get('instanceID')
                rollback_uuids.append(row_uuid.replace('uuid:', ''))

                try:
                    xml_file = BytesIO(
                        dict2xmlsubmission(
                            row, xform, row_uuid, submission_date))

                    try:
                        error, instance = safe_create_instance(
                            username, xml_file, [], xform.uuid, None)
                    except ValueError as e:
                        error = e

                    if error:
                        if not (isinstance(error, OpenRosaResponse)
                                and error.status_code == 202):
                            Instance.objects.filter(
                                uuid__in=rollback_uuids, xform=xform).delete()
                            return async_status(FAILED, text(error))
                        else:
                            duplicates += 1
                    else:
                        additions += 1

                        if additions % PROGRESS_BATCH_UPDATE == 0:
                            try:
                                current_task.update_state(
                                    state='PROGRESS',
                                    meta={
                                        'progress': additions,
                                        'total': num_rows,
                                        'info': additional_col
                                    })
                            except Exception:
                                logging.exception(
                                    _(u'Could not update state of '
                                        'import CSV batch process.'))
                            finally:
                                xform.submission_count(True)

                        users = User.objects.filter(
                            username=submitted_by) if submitted_by else []
                        if users:
                            instance.user = users[0]
                            instance.save()
                except Exception as e:
                    return failed_import(rollback_uuids, xform, e, text(e))
    except UnicodeDecodeError as e:
        return failed_import(rollback_uuids, xform, e,
                             'CSV file must be utf-8 encoded')

    if errors:
        # Rollback all created instances if an error occurred during
        # validation
        Instance.objects.filter(
            uuid__in=rollback_uuids, xform=xform).delete()
        xform.submission_count(True)
        return async_status(
            FAILED,
            u'Invalid CSV data imported in row(s): {}'.format(
                errors) if errors else ''
        )
    else:
        added_submissions = additions - inserts
        event_by = User.objects.get(username=username)
        event_name = None
        tracking_properties = {
            'xform_id': xform.pk,
            'project_id': xform.project.pk,
            'submitted_by': event_by,
            'label': f'csv-import-for-form-{xform.pk}',
            'from': 'CSV Import',
        }
        if added_submissions > 0:
            tracking_properties['value'] = added_submissions
            event_name = INSTANCE_CREATE_EVENT
            analytics.track(
                event_by, event_name, properties=tracking_properties)

        if inserts > 0:
            tracking_properties['value'] = inserts
            event_name = INSTANCE_UPDATE_EVENT
            analytics.track(
                event_by, event_name, properties=tracking_properties)

        return {
            'additions': added_submissions,
            'duplicates': duplicates,
            'updates': inserts,
            'info': "Additional column(s) excluded from the upload: '{0}'."
            .format(', '.join(list(additional_col)))}
예제 #13
0
            resp = _export_async_export_response(request, xform, export)
        else:
            resp = async_status(celery_state_to_status(job.state))

            # append task result to the response
            if job.result:
                resp.update(job.result) if isinstance(job.result, dict) else \
                    resp.update({'progress': str(job.result)})
    except ConnectionError, e:
        if count > 0:
            raise ServiceUnavailable(unicode(e))

        return get_async_response(job_uuid, request, xform, count + 1)
    except BacklogLimitExceeded:
        # most likely still processing
        resp = async_status(celery_state_to_status('PENDING'))

    return resp


def response_for_format(data, format=None):
    if format == 'xml':
        formatted_data = data.xml
    elif format == 'xls':
        if not data.xls:
            raise Http404()

        formatted_data = data.xls
    else:
        formatted_data = json.loads(data.json)
    return Response(formatted_data)