def _export_async_export_response(request, xform, export, dataview_pk=None): """ Checks the export status and generates the reponse :param request: :param xform: :param export: :return: response dict example {"job_status": "Success", "export_url": ...} """ if export.status == Export.SUCCESSFUL: if export.export_type not in [Export.EXTERNAL_EXPORT, Export.GOOGLE_SHEETS_EXPORT]: export_url = reverse( 'export-detail', kwargs={'pk': export.pk}, request=request) else: export_url = export.export_url resp = async_status(SUCCESSFUL) resp['export_url'] = export_url elif export.status == Export.PENDING: resp = async_status(PENDING) else: resp = async_status(FAILED, export.error_message) return resp
def get_async_response(job_uuid, request, xform, count=0): """ Returns the status of an async task for the given job_uuid. """ def _get_response(): export = get_object_or_404(Export, task_id=job_uuid) return export_async_export_response(request, export) try: job = AsyncResult(job_uuid) if job.state == 'SUCCESS': resp = _get_response() else: resp = async_status(celery_state_to_status(job.state)) # append task result to the response if job.result: result = job.result if isinstance(result, dict): resp.update(result) else: resp.update({'progress': str(result)}) except (OperationalError, ConnectionError) as e: report_exception("Connection Error", e, sys.exc_info()) if count > 0: raise ServiceUnavailable return get_async_response(job_uuid, request, xform, count + 1) except BacklogLimitExceeded: # most likely still processing resp = async_status(celery_state_to_status('PENDING')) return resp
def get_async_csv_submission_status(job_uuid): """ Gets CSV Submision progress or result Can be used to pol long running submissions :param str job_uuid: The submission job uuid returned by _submit_csv.delay :return: Dict with import progress info (insertions & total) :rtype: Dict """ if not job_uuid: return async_status(FAILED, u'Empty job uuid') job = AsyncResult(job_uuid) try: # result = (job.result or job.state) if job.state not in ['SUCCESS', 'FAILURE']: response = async_status(celery_state_to_status(job.state)) response.update(job.info) return response if job.state == 'FAILURE': return async_status(celery_state_to_status(job.state), text(job.result)) except BacklogLimitExceeded: return async_status(celery_state_to_status('PENDING')) return job.get()
def get_async_csv_submission_status(job_uuid): """ Gets CSV Submision progress or result Can be used to pol long running submissions :param str job_uuid: The submission job uuid returned by _submit_csv.delay :return: Dict with import progress info (insertions & total) :rtype: Dict """ if not job_uuid: return async_status(FAILED, u'Empty job uuid') job = AsyncResult(job_uuid) try: result = (job.result or job.state) if isinstance(result, (Exception)): return async_status(celery_state_to_status(job.state), job.result.message) if isinstance(result, (str, unicode)): return async_status(celery_state_to_status(job.state)) except BacklogLimitExceeded: return async_status(celery_state_to_status('PENDING')) return result
def get_async_response(job_uuid, request, xform, count=0): try: job = AsyncResult(job_uuid) if job.state == 'SUCCESS': export_id = job.result export = get_object_or_404(Export, id=export_id) resp = _export_async_export_response(request, xform, export) else: resp = async_status(celery_state_to_status(job.state)) # append task result to the response if job.result: resp.update(job.result) if isinstance(job.result, dict) else \ resp.update({'progress': str(job.result)}) except ConnectionError as e: if count > 0: raise ServiceUnavailable(unicode(e)) return get_async_response(job_uuid, request, xform, count + 1) except BacklogLimitExceeded: # most likely still processing resp = async_status(celery_state_to_status('PENDING')) return resp
def test_async_status(self): self.assertEqual(async_status.status_msg[async_status.PENDING], async_status.async_status(async_status.PENDING) .get('job_status')) self.assertEqual(async_status.status_msg[async_status.SUCCESSFUL], async_status.async_status(async_status.SUCCESSFUL) .get('job_status')) self.assertEqual(async_status.status_msg[async_status.FAILED], async_status.async_status(async_status.FAILED) .get('job_status')) self.assertTrue(async_status. async_status(async_status.FAILED, 'has error') .get('error')) self.assertFalse(async_status. async_status(async_status.SUCCESSFUL).get('error'))
def test_async_status(self): self.assertEqual(async_status.status_msg[async_status.PENDING], async_status.async_status(async_status.PENDING) .get('job_status')) self.assertEqual(async_status.status_msg[async_status.SUCCESSFUL], async_status.async_status(async_status.SUCCESSFUL) .get('job_status')) self.assertEqual(async_status.status_msg[async_status.FAILED], async_status.async_status(async_status.FAILED) .get('job_status')) self.assertTrue(async_status. async_status(async_status.FAILED, 'has error') .get('error')) self.assertFalse(async_status. async_status(async_status.SUCCESSFUL).get('error'))
def failed_import(rollback_uuids, xform, exception, status_message): """ Report a failed import. :param rollback_uuids: The rollback UUIDs :param xform: The XForm that failed to import to :param exception: The exception object :return: The async_status result """ Instance.objects.filter(uuid__in=rollback_uuids, xform=xform).delete() report_exception( 'CSV Import Failed : %d - %s - %s' % (xform.pk, xform.id_string, xform.title), exception, sys.exc_info()) return async_status(FAILED, status_message)
def get_async_response(job_uuid, request, xform, count=0): try: job = AsyncResult(job_uuid) if job.state == 'SUCCESS': export_id = job.result export = Export.objects.get(id=export_id) resp = _export_async_export_response(request, xform, export) else: resp = async_status(celery_state_to_status(job.state)) except ConnectionError, e: if count > 0: raise ServiceUnavailable(unicode(e)) return get_async_response(job_uuid, request, xform, count + 1)
def _generate_new_export(request, xform, query, export_type, dataview_pk=False): query = _set_start_end_params(request, query) extension = _get_extension_from_export_type(export_type) options = { "extension": extension, "username": xform.user.username, "id_string": xform.id_string, } if query: options['query'] = query options["dataview_pk"] = dataview_pk if export_type == Export.GOOGLE_SHEETS_EXPORT: options['google_credentials'] = \ _get_google_credential(request).to_json() try: if export_type == Export.EXTERNAL_EXPORT: options['token'] = request.GET.get('token') options['data_id'] = request.GET.get('data_id') options['meta'] = request.GET.get('meta') export = generate_external_export(export_type, xform.user.username, xform.id_string, None, options, xform=xform) elif export_type == Export.OSM_EXPORT: export = generate_osm_export(export_type, xform.user.username, xform.id_string, None, options, xform=xform) elif export_type == Export.ZIP_EXPORT: export = generate_attachments_zip_export(export_type, xform.user.username, xform.id_string, None, options, xform=xform) elif export_type == Export.KML_EXPORT: export = generate_kml_export(export_type, xform.user.username, xform.id_string, None, options, xform=xform) else: options.update(parse_request_export_options(request.query_params)) export = generate_export(export_type, xform, None, options) audit = {"xform": xform.id_string, "export_type": export_type} log.audit_log( log.Actions.EXPORT_CREATED, request.user, xform.user, _("Created %(export_type)s export on '%(id_string)s'.") % { 'id_string': xform.id_string, 'export_type': export_type.upper() }, audit, request) except NoRecordsFoundError: raise Http404(_("No records found to export")) except J2XException as e: # j2x exception return async_status(FAILED, str(e)) except SPSSIOError as e: raise exceptions.ParseError(str(e)) else: return export
def submit_csv(username, xform, csv_file, overwrite=False): """Imports CSV data to an existing form Takes a csv formatted file or string containing rows of submission/instance and converts those to xml submissions and finally submits them by calling :py:func:`onadata.libs.utils.logger_tools.safe_create_instance` :param str username: the subission user :param onadata.apps.logger.models.XForm xfrom: The submission's XForm. :param (str or file): A CSV formatted file with submission rows. :return: If sucessful, a dict with import summary else dict with error str. :rtype: Dict """ if isinstance(csv_file, str): csv_file = BytesIO(csv_file) elif csv_file is None or not hasattr(csv_file, 'read'): return async_status( FAILED, (u'Invalid param type for `csv_file`. ' 'Expected utf-8 encoded file or unicode' ' string got {} instead.'.format(type(csv_file).__name__))) num_rows = sum(1 for row in csv_file) - 1 csv_file.seek(0) csv_reader = ucsv.DictReader(csv_file, encoding='utf-8-sig') csv_header = csv_reader.fieldnames # check for spaces in headers if any(' ' in header for header in csv_header): return async_status(FAILED, u'CSV file fieldnames should not contain spaces') # Get the data dictionary xform_header = xform.get_headers() missing_col = set(xform_header).difference(csv_header) addition_col = set(csv_header).difference(xform_header) # change to list missing_col = list(missing_col) addition_col = list(addition_col) # remove all metadata columns missing = [ col for col in missing_col if not col.startswith("_") and col not in IGNORED_COLUMNS ] # remove all metadata inside groups missing = [col for col in missing if '/_' not in col] # ignore if is multiple select question for col in csv_header: # this col is a multiple select question survey_element = xform.get_survey_element(col) if survey_element and \ survey_element.get('type') == MULTIPLE_SELECT_TYPE: # remove from the missing and additional list missing = [x for x in missing if not x.startswith(col)] addition_col.remove(col) # remove headers for repeats that might be missing from csv missing = sorted([m for m in missing if m.find('[') == -1]) # Include additional repeats addition_col = [a for a in addition_col if a.find('[') == -1] if missing: return async_status( FAILED, u"Sorry uploaded file does not match the form. " u"The file is missing the column(s): " u"{0}.".format(', '.join(missing))) if overwrite: xform.instances.filter(deleted_at__isnull=True)\ .update(deleted_at=timezone.now(), deleted_by=User.objects.get(username=username)) rollback_uuids = [] submission_time = datetime.utcnow().isoformat() ona_uuid = {'formhub': {'uuid': xform.uuid}} error = None additions = duplicates = inserts = 0 try: for row in csv_reader: # remove the additional columns for index in addition_col: del row[index] # fetch submission uuid before purging row metadata row_uuid = row.get('meta/instanceID') or row.get('_uuid') submitted_by = row.get('_submitted_by') submission_date = row.get('_submission_time', submission_time) location_data = {} for key in list(row): # seems faster than a comprehension # remove metadata (keys starting with '_') if key.startswith('_'): del row[key] # Collect row location data into separate location_data dict if key.endswith( ('.latitude', '.longitude', '.altitude', '.precision')): location_key, location_prop = key.rsplit(u'.', 1) location_data.setdefault(location_key, {}).update( {location_prop: row.get(key, '0')}) # remove 'n/a' values if not key.startswith('_') and row[key] == 'n/a': del row[key] # collect all location K-V pairs into single geopoint field(s) # in location_data dict for location_key in list(location_data): location_data.update({ location_key: (u'%(latitude)s %(longitude)s ' '%(altitude)s %(precision)s') % defaultdict(lambda: '', location_data.get(location_key)) }) row = csv_dict_to_nested_dict(row) location_data = csv_dict_to_nested_dict(location_data) row = dict_merge(row, location_data) # inject our form's uuid into the submission row.update(ona_uuid) old_meta = row.get('meta', {}) new_meta, update = get_submission_meta_dict(xform, row_uuid) inserts += update old_meta.update(new_meta) row.update({'meta': old_meta}) row_uuid = row.get('meta').get('instanceID') rollback_uuids.append(row_uuid.replace('uuid:', '')) xml_file = BytesIO( dict2xmlsubmission(row, xform, row_uuid, submission_date)) try: error, instance = safe_create_instance(username, xml_file, [], xform.uuid, None) except ValueError as e: error = e if error: if not (isinstance(error, OpenRosaResponse) and error.status_code == 202): Instance.objects.filter(uuid__in=rollback_uuids, xform=xform).delete() return async_status(FAILED, text(error)) else: duplicates += 1 else: additions += 1 if additions % PROGRESS_BATCH_UPDATE == 0: try: current_task.update_state(state='PROGRESS', meta={ 'progress': additions, 'total': num_rows, 'info': addition_col }) print(current_task) except Exception: logging.exception( _(u'Could not update state of ' 'import CSV batch process.')) finally: xform.submission_count(True) users = User.objects.filter( username=submitted_by) if submitted_by else [] if users: instance.user = users[0] instance.save() except UnicodeDecodeError as e: return failed_import(rollback_uuids, xform, e, u'CSV file must be utf-8 encoded') except Exception as e: return failed_import(rollback_uuids, xform, e, text(e)) finally: xform.submission_count(True) return { "additions": additions - inserts, "duplicates": duplicates, u"updates": inserts, u"info": u"Additional column(s) excluded from the upload: '{0}'." .format(', '.join(list(addition_col))) } # yapf: disable
def submit_csv(username, xform, csv_file, overwrite=False): """Imports CSV data to an existing form Takes a csv formatted file or string containing rows of submission/instance and converts those to xml submissions and finally submits them by calling :py:func:`onadata.libs.utils.logger_tools.safe_create_instance` :param str username: the submission user :param onadata.apps.logger.models.XForm xform: The submission's XForm. :param (str or file) csv_file: A CSV formatted file with submission rows. :return: If sucessful, a dict with import summary else dict with error str. :rtype: Dict """ csv_file_validation_summary = validate_csv_file(csv_file, xform) if csv_file_validation_summary.get('valid'): additional_col = csv_file_validation_summary.get('additional_col') else: return async_status( FAILED, csv_file_validation_summary.get('error_msg') ) num_rows = sum(1 for row in csv_file) - 1 # Change stream position to start of file csv_file.seek(0) csv_reader = ucsv.DictReader(csv_file, encoding='utf-8-sig') xform_json = json.loads(xform.json) select_multiples = [ qstn.name for qstn in xform.get_survey_elements_of_type(MULTIPLE_SELECT_TYPE)] ona_uuid = {'formhub': {'uuid': xform.uuid}} additions = duplicates = inserts = 0 rollback_uuids = [] errors = {} # Retrieve the columns we should validate values for # Currently validating date, datetime, integer and decimal columns col_to_validate = { 'date': (get_columns_by_type(XLS_DATE_FIELDS, xform_json), parse), 'datetime': ( get_columns_by_type(XLS_DATETIME_FIELDS, xform_json), parse), 'integer': (get_columns_by_type(['integer'], xform_json), int), 'decimal': (get_columns_by_type(['decimal'], xform_json), float) } if overwrite: instance_ids = [i['id'] for i in xform.instances.values('id')] xform.instances.filter(deleted_at__isnull=True)\ .update(deleted_at=timezone.now(), deleted_by=User.objects.get(username=username)) # send message send_message( instance_id=instance_ids, target_id=xform.id, target_type=XFORM, user=User.objects.get(username=username), message_verb=SUBMISSION_DELETED) try: for row_no, row in enumerate(csv_reader): # Remove additional columns for index in additional_col: del row[index] # Remove 'n/a' and '' values from csv row = {k: v for (k, v) in row.items() if v not in [NA_REP, '']} row, error = validate_row(row, col_to_validate) if error: errors[row_no] = error # Only continue the process if no errors where encountered while # validating the data if not errors: location_data = {} for key in list(row): # Collect row location data into separate location_data # dict if key.endswith(('.latitude', '.longitude', '.altitude', '.precision')): location_key, location_prop = key.rsplit(u'.', 1) location_data.setdefault(location_key, {}).update({ location_prop: row.get(key, '0') }) # collect all location K-V pairs into single geopoint field(s) # in location_data dict for location_key in list(location_data): location_data.update({ location_key: (u'%(latitude)s %(longitude)s ' '%(altitude)s %(precision)s') % defaultdict( lambda: '', location_data.get(location_key)) }) nested_dict = csv_dict_to_nested_dict( row, select_multiples=select_multiples) row = flatten_split_select_multiples( nested_dict, select_multiples=select_multiples) location_data = csv_dict_to_nested_dict(location_data) # Merge location_data into the Row data row = dict_merge(row, location_data) submission_time = datetime.utcnow().isoformat() row_uuid = row.get('meta/instanceID') or 'uuid:{}'.format( row.get(UUID)) if row.get(UUID) else None submitted_by = row.get('_submitted_by') submission_date = row.get('_submission_time', submission_time) for key in list(row): # remove metadata (keys starting with '_') if key.startswith('_'): del row[key] # Inject our forms uuid into the submission row.update(ona_uuid) old_meta = row.get('meta', {}) new_meta, update = get_submission_meta_dict(xform, row_uuid) inserts += update old_meta.update(new_meta) row.update({'meta': old_meta}) row_uuid = row.get('meta').get('instanceID') rollback_uuids.append(row_uuid.replace('uuid:', '')) try: xml_file = BytesIO( dict2xmlsubmission( row, xform, row_uuid, submission_date)) try: error, instance = safe_create_instance( username, xml_file, [], xform.uuid, None) except ValueError as e: error = e if error: if not (isinstance(error, OpenRosaResponse) and error.status_code == 202): Instance.objects.filter( uuid__in=rollback_uuids, xform=xform).delete() return async_status(FAILED, text(error)) else: duplicates += 1 else: additions += 1 if additions % PROGRESS_BATCH_UPDATE == 0: try: current_task.update_state( state='PROGRESS', meta={ 'progress': additions, 'total': num_rows, 'info': additional_col }) except Exception: logging.exception( _(u'Could not update state of ' 'import CSV batch process.')) finally: xform.submission_count(True) users = User.objects.filter( username=submitted_by) if submitted_by else [] if users: instance.user = users[0] instance.save() except Exception as e: return failed_import(rollback_uuids, xform, e, text(e)) except UnicodeDecodeError as e: return failed_import(rollback_uuids, xform, e, 'CSV file must be utf-8 encoded') if errors: # Rollback all created instances if an error occurred during # validation Instance.objects.filter( uuid__in=rollback_uuids, xform=xform).delete() xform.submission_count(True) return async_status( FAILED, u'Invalid CSV data imported in row(s): {}'.format( errors) if errors else '' ) else: added_submissions = additions - inserts event_by = User.objects.get(username=username) event_name = None tracking_properties = { 'xform_id': xform.pk, 'project_id': xform.project.pk, 'submitted_by': event_by, 'label': f'csv-import-for-form-{xform.pk}', 'from': 'CSV Import', } if added_submissions > 0: tracking_properties['value'] = added_submissions event_name = INSTANCE_CREATE_EVENT analytics.track( event_by, event_name, properties=tracking_properties) if inserts > 0: tracking_properties['value'] = inserts event_name = INSTANCE_UPDATE_EVENT analytics.track( event_by, event_name, properties=tracking_properties) return { 'additions': added_submissions, 'duplicates': duplicates, 'updates': inserts, 'info': "Additional column(s) excluded from the upload: '{0}'." .format(', '.join(list(additional_col)))}
resp = _export_async_export_response(request, xform, export) else: resp = async_status(celery_state_to_status(job.state)) # append task result to the response if job.result: resp.update(job.result) if isinstance(job.result, dict) else \ resp.update({'progress': str(job.result)}) except ConnectionError, e: if count > 0: raise ServiceUnavailable(unicode(e)) return get_async_response(job_uuid, request, xform, count + 1) except BacklogLimitExceeded: # most likely still processing resp = async_status(celery_state_to_status('PENDING')) return resp def response_for_format(data, format=None): if format == 'xml': formatted_data = data.xml elif format == 'xls': if not data.xls: raise Http404() formatted_data = data.xls else: formatted_data = json.loads(data.json) return Response(formatted_data)