def test_check_encryption_status(self): """ Test that the encryption status of a submission is checked and unencrypted submissions are rejected when made to encrypted forms. """ form_path = (f"{settings.PROJECT_ROOT}/libs/tests/" "fixtures/tutorial/tutorial_encrypted.xlsx") self._publish_xls_file_and_set_xform(form_path) instance_xml = f""" <data xmlns:jr="http://openrosa.org/javarosa" xmlns:orx="http://openrosa.org/xforms" id="{self.xform.id_string}" version="{self.xform.version}"> <name>Bob</name> <age>20</age> <picture/> <has_children>0</has_children> <gps/> <web_browsers>firefox chrome safari</web_browsers> <meta> <instanceID>uuid:332f956b-b923-4f88-899d-849485ae66d0</instanceID> </meta> </data> """ # noqa req = HttpRequest() req.user = self.user ret = safe_create_instance( self.user.username, BytesIO(instance_xml.strip().encode('utf-8')), [], None, req) response = ret[0] expected_error = ("Unencrypted submissions are not allowed" " for encrypted forms.") self.assertIsNone(ret[1]) self.assertEqual(response.status_code, 400) self.assertIn(expected_error, str(response.content)) # Test incorrectly formatted encrypted submission is rejected instance_xml = f""" <data id="{self.xform.id_string}" version="{self.xform.version}" encrypted="yes" xmlns="http://www.opendatakit.org/xforms/encrypted"> <orx:meta xmlns:orx="http://openrosa.org/xforms"> <orx:instanceID>uuid:6850c987-fcd6-4469-a843-7ce200af00e2</orx:instanceID> </orx:meta>\n<encryptedXmlFile>submission.xml.enc</encryptedXmlFile> <base64EncryptedElementSignature>PfYw8EIFutyhT03rdOf6rT/1FuETsOHbcnIOJdB9qBre7BWGu0k4fRUpv3QdyTil9wCez64MyOXbsHzFyTcazAkBmBPKuqiK7k3dws57rRuJEpmLjOtniQoAuTaXnAlTwp2x6KEvLt9Kqfa8kD8cFvwsRBs8rvkolAl33UAuNjzO7j9h0N94R9syqc6jNR5gGGaG74KlhYvAZnekoPXGb3MjZMDqjCSnYdiPz8iVOUsPBvuitzYIqGdfe1sW8EkQBOp0ACsD31EQ03iWyb8Mg5JSTCdz7T+qdtd0R65EjQ4ZTpDv72/owocteXVV6dCKi564YFXbiwpdkzf80B+QoQ==</base64EncryptedElementSignature> </data> """ # noqa ret = safe_create_instance( self.user.username, BytesIO(instance_xml.strip().encode('utf-8')), [], None, req) response = ret[0] expected_error = ("Encrypted submission incorrectly formatted.") self.assertIsNone(ret[1]) self.assertEqual(response.status_code, 400) self.assertIn(expected_error, str(response.content))
def create_instance_from_xml(username, request): xml_file_list = request.FILES.pop('xml_submission_file', []) xml_file = xml_file_list[0] if len(xml_file_list) else None media_files = request.FILES.values() return safe_create_instance(username, xml_file, media_files, None, request)
def test_corrupted_submission(self): """Test xml submissions that contain unicode characters. """ xml = 'v\xee\xf3\xc0k\x91\x91\xae\xff\xff\xff\xff\xcf[$b\xd0\xc9\'uW\x80RP\xff\xff\xff\xff7\xd0\x03%F\xa7p\xa2\x87\xb6f\xb1\xff\xff\xff\xffg~\xf3O\xf3\x9b\xbc\xf6ej_$\xff\xff\xff\xff\x13\xe8\xa9D\xed\xfb\xe7\xa4d\x96>\xfa\xff\xff\xff\xff\xc7h"\x86\x14\\.\xdb\x8aoF\xa4\xff\xff\xff\xff\xcez\xff\x01\x0c\x9a\x94\x18\xe1\x03\x8e\xfa\xff\xff\xff\xff39P|\xf9n\x18F\xb1\xcb\xacd\xff\xff\xff\xff\xce>\x97i;1u\xcfI*\xf2\x8e\xff\xff\xff\xffFg\x9d\x0fR:\xcd*\x14\x85\xf0e\xff\xff\xff\xff\xd6\xdc\xda\x8eM\x06\xf1\xfc\xc1\xe8\xd6\xe0\xff\xff\xff\xff\xe7G\xe1\xa1l\x02T\n\xde\x1boJ\xff\xff\xff\xffz \x92\xbc\tR{#\xbb\x9f\xa6s\xff\xff\xff\xff\xa2\x8f(\xb6=\xe11\xfcV\xcf\xef\x0b\xff\xff\xff\xff\xa3\x83\x7ft\xd7\x05+)\xeb9\\*\xff\xff\xff\xff\xfe\x93\xb2\xa2\x06n;\x1b4\xaf\xa6\x93\xff\xff\xff\xff\xe7\xf7\x12Q\x83\xbb\x9a\xc8\xc8q34\xff\xff\xff\xffT2\xa5\x07\x9a\xc9\x89\xf8\x14Y\xab\x19\xff\xff\xff\xff\x16\xd0R\x1d\x06B\x95\xea\\\x1ftP\xff\xff\xff\xff\x94^\'\x01#oYV\xc5\\\xb7@\xff\xff\xff\xff !\x11\x00\x8b\xf3[\xde\xa2\x01\x9dl\xff\xff\xff\xff\xe7z\x92\xc3\x03\xd3\xb5B5 \xaa7\xff\xff\xff\xff\xff\xc3Q:\xa6\xb3\xa3\x1e\x90 \xa0\\\xff\xff\xff\xff\xff\x14<\x03Vr\xe8Z.Ql\xf5\xff\xff\xff\xffEx\xf7\x0b_\xa1\x7f\xfcG\xa4\x18\xcd\xff\xff\xff\xff1|~i\x00\xb3. ,1Q\x0e\xff\xff\xff\xff\x87a\x933Y\xd7\xe1B#\xa7a\xee\xff\xff\xff\xff\r\tJ\x18\xd0\xdb\x0b\xbe\x00\x91\x95\x9e\xff\xff\xff\xffHfW\xcd\x8f\xa9z6|\xc5\x171\xff\xff\xff\xff\xf5tP7\x93\x02Q|x\x17\xb1\xcb\xff\xff\xff\xffVb\x11\xa0*\xd9;\x0b\xf8\x1c\xd3c\xff\xff\xff\xff\x84\x82\xcer\x15\x99`5LmA\xd5\xff\xff\xff\xfft\xce\x8e\xcbw\xee\xf3\xc0w\xca\xb3\xfd\xff\xff\xff\xff\xb0\xaab\x92\xd4\x02\x84H3\x94\xa9~\xff\xff\xff\xff\xfe7\x18\xcaW=\x94\xbc|\x0f{\x84\xff\xff\xff\xff\xe8\xdf\xde?\x8b\xb7\x9dH3\xc1\xf2\xaa\xff\xff\xff\xff\xbe\x00\xba\xd7\xba6!\x95g\xb01\xf9\xff\xff\xff\xff\x93\xe3\x90YH9g\xf7\x97nhv\xff\xff\xff\xff\x82\xc7`\xaebn\x9d\x1e}\xba\x1e/\xff\xff\xff\xff\xbd\xe5\xa1\x05\x03\xf26\xa0\xe2\xc1*\x07\xff\xff\xff\xffny\x88\x9f\x19\xd2\xd0\xf7\x1de\xa7\xe0\xff\xff\xff\xff\xc4O&\x14\x8dVH\x90\x8b+\x03\xf9\xff\xff\xff\xff\xf69\xc2\xabo%\xcc/\xc9\xe4dP\xff\xff\xff\xff (\x08G\xebM\x03\x99Y\xb4\xb3\x1f\xff\xff\xff\xffzH\xd2\x19p#\xc5\xa4)\xfd\x05\x9a\xff\xff\xff\xffd\x86\xb2F\x15\x0f\xf4.\xfd\\\xd4#\xff\xff\xff\xff\xaf\xbe\xc6\x9di\xa0\xbc\xd5>cp\xe2\xff\xff\xff\xff&h\x91\xe9\xa0H\xdd\xaer\x87\x18E\xff\xff\xff\xffjg\x08E\x8f\xa4&\xab\xff\x98\x0ei\xff\xff\xff\xff\x01\xfd{"\xed\\\xa3M\x9e\xc3\xf8K\xff\xff\xff\xff\x87Y\x98T\xf0\xa6\xec\x98\xb3\xef\xa7\xaa\xff\xff\xff\xffA\xced\xfal\xd3\xd9\x06\xc6~\xee}\xff\xff\xff\xff:\x7f\xa2\x10\xc7\xadB,}PF%\xff\xff\xff\xff\xb2\xbc\n\x17%\x98\x904\x89\tF\x1f\xff\xff\xff\xff\xdc\xd8\xc6@#M\x87uf\x02\xc6g\xff\xff\xff\xffK\xaf\xb0-=l\x07\xe1Nv\xe4\xf4\xff\xff\xff\xff\xdb\x13\'Ne\xb2UT\x9a#\xb1^\xff\xff\xff\xff\xb2\rne\xd1\x9d\x88\xda\xbb!\xfa@\xff\xff\xff\xffflq\x0f\x01z]uh\'|?\xff\xff\xff\xff\xd5\'\x19\x865\xba\xf2\xe7\x8fR-\xcc\xff\xff\xff\xff\xce\xd6\xfdi\x04\x9b\xa7\tu\x05\xb7\xc8\xff\xff\xff\xff\xc3\xd0)\x11\xdd\xb1\xa5kp\xc9\xd5\xf7\xff\xff\xff\xff\xffU\x9f \xb7\xa1#3rup[\xff\xff\xff\xff\xfc=' # noqa request = RequestFactory().post('/') request.user = self.user error, instance = safe_create_instance( self.user.username, TempFileProxy(xml), None, None, request) text = 'File likely corrupted during transmission' self.assertContains(error, text, status_code=400)
def test_corrupted_submission(self): """Test xml submissions that contain unicode characters. """ xml = 'v\xee\xf3\xc0k\x91\x91\xae\xff\xff\xff\xff\xcf[$b\xd0\xc9\'uW\x80RP\xff\xff\xff\xff7\xd0\x03%F\xa7p\xa2\x87\xb6f\xb1\xff\xff\xff\xffg~\xf3O\xf3\x9b\xbc\xf6ej_$\xff\xff\xff\xff\x13\xe8\xa9D\xed\xfb\xe7\xa4d\x96>\xfa\xff\xff\xff\xff\xc7h"\x86\x14\\.\xdb\x8aoF\xa4\xff\xff\xff\xff\xcez\xff\x01\x0c\x9a\x94\x18\xe1\x03\x8e\xfa\xff\xff\xff\xff39P|\xf9n\x18F\xb1\xcb\xacd\xff\xff\xff\xff\xce>\x97i;1u\xcfI*\xf2\x8e\xff\xff\xff\xffFg\x9d\x0fR:\xcd*\x14\x85\xf0e\xff\xff\xff\xff\xd6\xdc\xda\x8eM\x06\xf1\xfc\xc1\xe8\xd6\xe0\xff\xff\xff\xff\xe7G\xe1\xa1l\x02T\n\xde\x1boJ\xff\xff\xff\xffz \x92\xbc\tR{#\xbb\x9f\xa6s\xff\xff\xff\xff\xa2\x8f(\xb6=\xe11\xfcV\xcf\xef\x0b\xff\xff\xff\xff\xa3\x83\x7ft\xd7\x05+)\xeb9\\*\xff\xff\xff\xff\xfe\x93\xb2\xa2\x06n;\x1b4\xaf\xa6\x93\xff\xff\xff\xff\xe7\xf7\x12Q\x83\xbb\x9a\xc8\xc8q34\xff\xff\xff\xffT2\xa5\x07\x9a\xc9\x89\xf8\x14Y\xab\x19\xff\xff\xff\xff\x16\xd0R\x1d\x06B\x95\xea\\\x1ftP\xff\xff\xff\xff\x94^\'\x01#oYV\xc5\\\xb7@\xff\xff\xff\xff !\x11\x00\x8b\xf3[\xde\xa2\x01\x9dl\xff\xff\xff\xff\xe7z\x92\xc3\x03\xd3\xb5B5 \xaa7\xff\xff\xff\xff\xff\xc3Q:\xa6\xb3\xa3\x1e\x90 \xa0\\\xff\xff\xff\xff\xff\x14<\x03Vr\xe8Z.Ql\xf5\xff\xff\xff\xffEx\xf7\x0b_\xa1\x7f\xfcG\xa4\x18\xcd\xff\xff\xff\xff1|~i\x00\xb3. ,1Q\x0e\xff\xff\xff\xff\x87a\x933Y\xd7\xe1B#\xa7a\xee\xff\xff\xff\xff\r\tJ\x18\xd0\xdb\x0b\xbe\x00\x91\x95\x9e\xff\xff\xff\xffHfW\xcd\x8f\xa9z6|\xc5\x171\xff\xff\xff\xff\xf5tP7\x93\x02Q|x\x17\xb1\xcb\xff\xff\xff\xffVb\x11\xa0*\xd9;\x0b\xf8\x1c\xd3c\xff\xff\xff\xff\x84\x82\xcer\x15\x99`5LmA\xd5\xff\xff\xff\xfft\xce\x8e\xcbw\xee\xf3\xc0w\xca\xb3\xfd\xff\xff\xff\xff\xb0\xaab\x92\xd4\x02\x84H3\x94\xa9~\xff\xff\xff\xff\xfe7\x18\xcaW=\x94\xbc|\x0f{\x84\xff\xff\xff\xff\xe8\xdf\xde?\x8b\xb7\x9dH3\xc1\xf2\xaa\xff\xff\xff\xff\xbe\x00\xba\xd7\xba6!\x95g\xb01\xf9\xff\xff\xff\xff\x93\xe3\x90YH9g\xf7\x97nhv\xff\xff\xff\xff\x82\xc7`\xaebn\x9d\x1e}\xba\x1e/\xff\xff\xff\xff\xbd\xe5\xa1\x05\x03\xf26\xa0\xe2\xc1*\x07\xff\xff\xff\xffny\x88\x9f\x19\xd2\xd0\xf7\x1de\xa7\xe0\xff\xff\xff\xff\xc4O&\x14\x8dVH\x90\x8b+\x03\xf9\xff\xff\xff\xff\xf69\xc2\xabo%\xcc/\xc9\xe4dP\xff\xff\xff\xff (\x08G\xebM\x03\x99Y\xb4\xb3\x1f\xff\xff\xff\xffzH\xd2\x19p#\xc5\xa4)\xfd\x05\x9a\xff\xff\xff\xffd\x86\xb2F\x15\x0f\xf4.\xfd\\\xd4#\xff\xff\xff\xff\xaf\xbe\xc6\x9di\xa0\xbc\xd5>cp\xe2\xff\xff\xff\xff&h\x91\xe9\xa0H\xdd\xaer\x87\x18E\xff\xff\xff\xffjg\x08E\x8f\xa4&\xab\xff\x98\x0ei\xff\xff\xff\xff\x01\xfd{"\xed\\\xa3M\x9e\xc3\xf8K\xff\xff\xff\xff\x87Y\x98T\xf0\xa6\xec\x98\xb3\xef\xa7\xaa\xff\xff\xff\xffA\xced\xfal\xd3\xd9\x06\xc6~\xee}\xff\xff\xff\xff:\x7f\xa2\x10\xc7\xadB,}PF%\xff\xff\xff\xff\xb2\xbc\n\x17%\x98\x904\x89\tF\x1f\xff\xff\xff\xff\xdc\xd8\xc6@#M\x87uf\x02\xc6g\xff\xff\xff\xffK\xaf\xb0-=l\x07\xe1Nv\xe4\xf4\xff\xff\xff\xff\xdb\x13\'Ne\xb2UT\x9a#\xb1^\xff\xff\xff\xff\xb2\rne\xd1\x9d\x88\xda\xbb!\xfa@\xff\xff\xff\xffflq\x0f\x01z]uh\'|?\xff\xff\xff\xff\xd5\'\x19\x865\xba\xf2\xe7\x8fR-\xcc\xff\xff\xff\xff\xce\xd6\xfdi\x04\x9b\xa7\tu\x05\xb7\xc8\xff\xff\xff\xff\xc3\xd0)\x11\xdd\xb1\xa5kp\xc9\xd5\xf7\xff\xff\xff\xff\xffU\x9f \xb7\xa1#3rup[\xff\xff\xff\xff\xfc=' # noqa request = RequestFactory().post('/') request.user = self.user error, instance = safe_create_instance(self.user.username, TempFileProxy(xml), None, None, request) text = 'File likely corrupted during transmission' self.assertContains(error, text, status_code=400)
def create_submission(request, username, data_dict, xform_id): """ Returns validated data object instances """ xml_string = dict2xform(data_dict, xform_id) xml_file = BytesIO(xml_string.encode('utf-8')) error, instance = safe_create_instance(username, xml_file, [], None, request) if error: raise serializers.ValidationError(error.message) return instance
def create_submission(request, username, data_dict, xform_id): """ Returns validated data object instances """ xml_string = dict2xform(data_dict, xform_id) xml_file = StringIO.StringIO(xml_string) error, instance = safe_create_instance(username, xml_file, [], None, request) if error: raise serializers.ValidationError(error.message) return instance
def create_submission( request, username, data_dict, xform_id, gen_uuid: bool = False): """ Returns validated data object instances """ xml_string = dict2xform( data_dict, xform_id, username=username, gen_uuid=gen_uuid) xml_file = BytesIO(xml_string.encode('utf-8')) error, instance = safe_create_instance(username, xml_file, [], None, request) if error: raise serializers.ValidationError(error.message) return instance
def create(self, validated_data): request = self.context['request'] responses = validated_data['responses'] xform = get_object_or_404(XForm, uuid=validated_data['id']) processed = [] for submission in parse_responses(responses): xml_file = BytesIO(dict2xform( submission, xform.id_string, 'data').encode('utf-8')) error, instance = safe_create_instance( request.user.username, xml_file, [], None, request) processed.append(instance.pk) if error: raise serializers.ValidationError(error) return FlowResultsResponse(xform.uuid, responses)
def create_instance_from_json(username, request): request.accepted_renderer = JSONRenderer() request.accepted_media_type = JSONRenderer.media_type dict_form = request.data submission = dict_form.get('submission') if submission is None: # return an error return [t("No submission key provided."), None] # convert lists in submission dict to joined strings submission_joined = dict_lists2strings(submission) xml_string = dict2xform(submission_joined, dict_form.get('id')) xml_file = io.StringIO(xml_string) return safe_create_instance(username, xml_file, [], None, request)
def create_instance_from_json(username, request): request.accepted_renderer = JSONRenderer() request.accepted_media_type = JSONRenderer.media_type dict_form = request.data submission = dict_form.get('submission') if submission is None: # return an error return [_(u"No submission key provided."), None] # convert lists in submission dict to joined strings submission_joined = dict_lists2strings(submission) xml_string = dict2xform(submission_joined, dict_form.get('id')) xml_file = StringIO.StringIO(xml_string) return safe_create_instance(username, xml_file, [], None, request)
def create(self, validated_data): duplicates = 0 request = self.context['request'] responses = validated_data['responses'] xform = get_object_or_404(XForm, uuid=validated_data['id'], deleted_at__isnull=True) for submission in parse_responses(responses): xml_file = BytesIO(dict2xform( submission, xform.id_string, 'data').encode('utf-8')) error, _instance = safe_create_instance( request.user.username, xml_file, [], None, request) if error and error.status_code != 202: raise serializers.ValidationError(error) if error and error.status_code == 202: duplicates += 1 return FlowResultsResponse(xform.uuid, responses, duplicates)
def create(self, validated_data): """ Returns object instances based on the validated data """ request, username = get_request_and_username(self.context) xml_file_list = request.FILES.pop('xml_submission_file', []) xml_file = xml_file_list[0] if xml_file_list else None media_files = request.FILES.values() error, instance = safe_create_instance(username, xml_file, media_files, None, request) if error: exc = exceptions.APIException(detail=error) exc.response = error exc.status_code = error.status_code raise exc return instance
def submit_csv( request: 'django.http.HttpRequest', xform: 'onadata.apps.logger.models.XForm', csv_file: Union[str, TextIO], ) -> dict: """ Imports CSV data to an existing form Takes a csv formatted file or string containing rows of submission/instance and converts those to xml submissions and finally submits them by calling :py:func:`onadata.libs.utils.logger_tools.safe_create_instance` """ if isinstance(csv_file, str): csv_file = io.StringIO(csv_file) elif csv_file is None or not hasattr(csv_file, 'read'): return { 'error': ('Invalid param type for `csv_file`. ' 'Expected file or String ' 'got {} instead.'.format(type(csv_file).__name__)) } csv_reader = ucsv.DictReader(csv_file) rollback_uuids = [] submission_time = datetime.utcnow().isoformat() ona_uuid = {'formhub': {'uuid': xform.uuid}} error = None additions = inserts = 0 for row in csv_reader: # fetch submission uuid before purging row metadata row_uuid = row.get('_uuid') submission_date = row.get('_submission_time', submission_time) row_iter = dict(row) for key in row_iter: # seems faster than a comprehension # remove metadata (keys starting with '_') if key.startswith('_'): del row[key] # process nested data e.g x[formhub/uuid] => x[formhub][uuid] if r'/' in key: p, c = key.split('/') row[p] = {c: row[key]} del row[key] # inject our form's uuid into the submission row.update(ona_uuid) old_meta = row.get('meta', {}) new_meta, update = get_submission_meta_dict(xform, row_uuid) inserts += update old_meta.update(new_meta) row.update({'meta': old_meta}) row_uuid = row.get('meta').get('instanceID') rollback_uuids.append(row_uuid.replace('uuid:', '')) xml_file = io.StringIO( dict2xmlsubmission(row, xform, row_uuid, submission_date)) try: error, instance = safe_create_instance(request.user.username, xml_file, [], xform.uuid, request) except ValueError as e: error = e if error: Instance.objects.filter(uuid__in=rollback_uuids, xform=xform).delete() return {'error': str(error)} else: additions += 1 return {'additions': additions - inserts, 'updates': inserts}
def submission(request, username=None): if username: formlist_user = get_object_or_404(User, username__iexact=username) profile, created = UserProfile.objects.get_or_create( user=formlist_user) if profile.require_auth: authenticator = HttpDigestAuthenticator() if not authenticator.authenticate(request): return authenticator.build_challenge_response() if request.method == 'HEAD': response = OpenRosaResponse(status=204) if username: response['Location'] = request.build_absolute_uri().replace( request.get_full_path(), '/%s/submission' % username) else: response['Location'] = request.build_absolute_uri().replace( request.get_full_path(), '/submission') return response xml_file_list = [] media_files = [] # request.FILES is a django.utils.datastructures.MultiValueDict # for each key we have a list of values try: xml_file_list = request.FILES.pop("xml_submission_file", []) if len(xml_file_list) != 1: return OpenRosaResponseBadRequest( _(u"There should be a single XML submission file.")) # save this XML file and media files as attachments media_files = request.FILES.values() # get uuid from post request uuid = request.POST.get('uuid') error, instance = safe_create_instance(username, xml_file_list[0], media_files, uuid, request) if error: return error elif instance is None: return OpenRosaResponseBadRequest( _(u"Unable to create submission.")) audit = {"xform": instance.xform.id_string} audit_log( Actions.SUBMISSION_CREATED, request.user, instance.xform.user, _("Created submission on form %(id_string)s.") % {"id_string": instance.xform.id_string}, audit, request) # response as html if posting with a UUID if not username and uuid: response = _html_submission_response(request, instance) else: response = _submission_response(request, instance) # ODK needs two things for a form to be considered successful # 1) the status code needs to be 201 (created) # 2) The location header needs to be set to the host it posted to response.status_code = 201 response['Location'] = request.build_absolute_uri(request.path) return response except IOError as e: if _bad_request(e): return OpenRosaResponseBadRequest( _(u"File transfer interruption.")) else: raise finally: if len(xml_file_list): [_file.close() for _file in xml_file_list] if len(media_files): [_file.close() for _file in media_files]
def submit_csv(username, xform, csv_file): """ Imports CSV data to an existing form Takes a csv formatted file or string containing rows of submission/instance and converts those to xml submissions and finally submits them by calling :py:func:`onadata.libs.utils.logger_tools.safe_create_instance` :param str username: the subission user :param onadata.apps.logger.models.XForm xfrom: The submission's XForm. :param (str or file): A CSV formatted file with submission rows. :return: If sucessful, a dict with import summary else dict with error str. :rtype: Dict """ if isinstance(csv_file, (str, unicode)): csv_file = cStringIO.StringIO(csv_file) elif csv_file is None or not hasattr(csv_file, 'read'): return { 'error': ('Invalid param type for `csv_file`. ' 'Expected file or String ' 'got {} instead.'.format(type(csv_file).__name__)) } csv_reader = ucsv.DictReader(csv_file) rollback_uuids = [] submission_time = datetime.utcnow().isoformat() ona_uuid = {'formhub': {'uuid': xform.uuid}} error = None additions = inserts = 0 for row in csv_reader: # fetch submission uuid before purging row metadata row_uuid = row.get('_uuid') submitted_by = row.get('_submitted_by') submission_date = row.get('_submission_time', submission_time) for key in row.keys(): # seems faster than a comprehension # remove metadata (keys starting with '_') if key.startswith('_'): del row[key] # process nested data e.g x[formhub/uuid] => x[formhub][uuid] if r'/' in key: p, c = key.split('/') row[p] = {c: row[key]} del row[key] # inject our form's uuid into the submission row.update(ona_uuid) old_meta = row.get('meta', {}) new_meta, update = get_submission_meta_dict(xform, row_uuid) inserts += update old_meta.update(new_meta) row.update({'meta': old_meta}) row_uuid = row.get('meta').get('instanceID') rollback_uuids.append(row_uuid.replace('uuid:', '')) xml_file = cStringIO.StringIO( dict2xmlsubmission(row, xform, row_uuid, submission_date)) try: error, instance = safe_create_instance(username, xml_file, [], xform.uuid, None) except ValueError as e: error = e if error: Instance.objects.filter(uuid__in=rollback_uuids, xform=xform).delete() return {'error': str(error)} else: additions += 1 users = User.objects.filter( username=submitted_by) if submitted_by else [] if users: instance.user = users[0] instance.save() return {'additions': additions - inserts, 'updates': inserts}
def submit_csv(username, xform, csv_file): """ Imports CSV data to an existing form Takes a csv formatted file or string containing rows of submission/instance and converts those to xml submissions and finally submits them by calling :py:func:`onadata.libs.utils.logger_tools.safe_create_instance` :param str username: the subission user :param onadata.apps.logger.models.XForm xfrom: The submission's XForm. :param (str or file): A CSV formatted file with submission rows. :return: If sucessful, a dict with import summary else dict with error str. :rtype: Dict """ if isinstance(csv_file, unicode): csv_file = cStringIO.StringIO(csv_file) elif csv_file is None or not hasattr(csv_file, 'read'): return {'error': (u'Invalid param type for `csv_file`. ' 'Expected utf-8 encoded file or unicode string ' 'got {} instead.'.format(type(csv_file).__name__))} num_rows = sum(1 for row in csv_file) - 1 csv_file.seek(0) csv_reader = ucsv.DictReader(csv_file) csv_header = csv_reader.fieldnames # check for spaces in headers if any(' ' in header for header in csv_header): return {'error': u'CSV file fieldnames should not contain spaces'} # Get the data dictionary dd = xform.data_dictionary() xform_header = dd.get_headers() missing_col = set(xform_header).difference(csv_header) addition_col = set(csv_header).difference(xform_header) # change to list missing_col = list(missing_col) addition_col = list(addition_col) # remove all metadata columns missing = [col for col in missing_col if not col.startswith("_")] # remove all metadata inside groups missing = [col for col in missing if not ("/_" in col)] # ignore if is multiple select question for col in csv_header: # this col is a multiple select question survey_element = dd.get_survey_element(col) if survey_element and \ survey_element.get('type') == MULTIPLE_SELECT_TYPE: # remove from the missing and additional list missing = [x for x in missing if not x.startswith(col)] addition_col.remove(col) if missing: return {'error': u"Sorry uploaded file does not match the form. " u"The file is missing the column(s): " u"{0}.".format(', '.join(missing))} rollback_uuids = [] submission_time = datetime.utcnow().isoformat() ona_uuid = {'formhub': {'uuid': xform.uuid}} error = None additions = inserts = 0 try: for row in csv_reader: # remove the additional columns for index in addition_col: del row[index] # fetch submission uuid before purging row metadata row_uuid = row.get('_uuid') submitted_by = row.get('_submitted_by') submission_date = row.get('_submission_time', submission_time) location_data = {} for key in row.keys(): # seems faster than a comprehension # remove metadata (keys starting with '_') if key.startswith('_'): del row[key] # Collect row location data into separate location_data dict if key.endswith(('.latitude', '.longitude', '.altitude', '.precision')): location_key, location_prop = key.rsplit(u'.', 1) location_data.setdefault(location_key, {}).update( {location_prop: row.get(key, '0')}) # collect all location K-V pairs into single geopoint field(s) # in location_data dict for location_key in location_data.keys(): location_data.update( {location_key: (u'%(latitude)s %(longitude)s ' '%(altitude)s %(precision)s') % defaultdict( lambda: '', location_data.get(location_key))}) row = dict_pathkeys_to_nested_dicts(row) location_data = dict_pathkeys_to_nested_dicts(location_data) row = dict_merge(row, location_data) # inject our form's uuid into the submission row.update(ona_uuid) old_meta = row.get('meta', {}) new_meta, update = get_submission_meta_dict(xform, row_uuid) inserts += update old_meta.update(new_meta) row.update({'meta': old_meta}) row_uuid = row.get('meta').get('instanceID') rollback_uuids.append(row_uuid.replace('uuid:', '')) xml_file = cStringIO.StringIO( dict2xmlsubmission(row, xform, row_uuid, submission_date)) try: error, instance = safe_create_instance(username, xml_file, [], xform.uuid, None) except ValueError as e: error = e if error: Instance.objects.filter(uuid__in=rollback_uuids, xform=xform).delete() return {'error': str(error)} else: additions += 1 try: current_task.update_state(state='PROGRESS', meta={'progress': additions, 'total': num_rows, 'info': addition_col}) except: pass users = User.objects.filter( username=submitted_by) if submitted_by else [] if users: instance.user = users[0] instance.save() except UnicodeDecodeError: Instance.objects.filter(uuid__in=rollback_uuids, xform=xform).delete() return {'error': u'CSV file must be utf-8 encoded'} except Exception as e: Instance.objects.filter(uuid__in=rollback_uuids, xform=xform).delete() return {'error': str(e)} return {u"additions": additions - inserts, u"updates": inserts, u"info": u"Additional column(s) excluded from the upload: '{0}'." .format(', '.join(list(addition_col)))}
def submission(request, username=None): if username: formlist_user = get_object_or_404(User, username__iexact=username) profile, created = UserProfile.objects.get_or_create( user=formlist_user) if profile.require_auth: authenticator = HttpDigestAuthenticator() if not authenticator.authenticate(request): return authenticator.build_challenge_response() if request.method == 'HEAD': response = OpenRosaResponse(status=204) if username: response['Location'] = request.build_absolute_uri().replace( request.get_full_path(), '/%s/submission' % username) else: response['Location'] = request.build_absolute_uri().replace( request.get_full_path(), '/submission') return response xml_file_list = [] media_files = [] # request.FILES is a django.utils.datastructures.MultiValueDict # for each key we have a list of values try: xml_file_list = request.FILES.pop("xml_submission_file", []) if len(xml_file_list) != 1: return OpenRosaResponseBadRequest( _(u"There should be a single XML submission file.") ) # save this XML file and media files as attachments media_files = request.FILES.values() # get uuid from post request uuid = request.POST.get('uuid') error, instance = safe_create_instance( username, xml_file_list[0], media_files, uuid, request) if error: return error elif instance is None: return OpenRosaResponseBadRequest( _(u"Unable to create submission.")) audit = { "xform": instance.xform.id_string } audit_log( Actions.SUBMISSION_CREATED, request.user, instance.xform.user, _("Created submission on form %(id_string)s.") % { "id_string": instance.xform.id_string }, audit, request) # response as html if posting with a UUID if not username and uuid: response = _html_submission_response(request, instance) else: response = _submission_response(request, instance) # ODK needs two things for a form to be considered successful # 1) the status code needs to be 201 (created) # 2) The location header needs to be set to the host it posted to response.status_code = 201 response['Location'] = request.build_absolute_uri(request.path) return response except IOError as e: if _bad_request(e): return OpenRosaResponseBadRequest( _(u"File transfer interruption.")) else: raise finally: if len(xml_file_list): [_file.close() for _file in xml_file_list] if len(media_files): [_file.close() for _file in media_files]
def submit_csv(username, xform, csv_file): """ Imports CSV data to an existing form Takes a csv formatted file or string containing rows of submission/instance and converts those to xml submissions and finally submits them by calling :py:func:`onadata.libs.utils.logger_tools.safe_create_instance` :param str username: the subission user :param onadata.apps.logger.models.XForm xfrom: The submission's XForm. :param (str or file): A CSV formatted file with submission rows. :return: If sucessful, a dict with import summary else dict with error str. :rtype: Dict """ if isinstance(csv_file, unicode): csv_file = cStringIO.StringIO(csv_file) elif csv_file is None or not hasattr(csv_file, 'read'): return {'error': (u'Invalid param type for `csv_file`. ' 'Expected utf-8 encoded file or unicode string ' 'got {} instead.'.format(type(csv_file).__name__))} csv_file.seek(0) csv_reader = ucsv.DictReader(csv_file) # check for spaces in headers if any(' ' in header for header in csv_reader.fieldnames): return {'error': u'CSV file fieldnames should not contain spaces'} rollback_uuids = [] submission_time = datetime.utcnow().isoformat() ona_uuid = {'formhub': {'uuid': xform.uuid}} error = None additions = inserts = 0 try: for row in csv_reader: # fetch submission uuid before purging row metadata row_uuid = row.get('_uuid') submitted_by = row.get('_submitted_by') submission_date = row.get('_submission_time', submission_time) location_data = {} for key in row.keys(): # seems faster than a comprehension # remove metadata (keys starting with '_') if key.startswith('_'): del row[key] # Collect row location data into separate location_data dict if key.endswith(('.latitude', '.longitude', '.altitude', '.precision')): location_key, location_prop = key.rsplit(u'.', 1) location_data.setdefault(location_key, {}).update( {location_prop: row.get(key, '0')}) # collect all location K-V pairs into single geopoint field(s) # in location_data dict for location_key in location_data.keys(): location_data.update( {location_key: (u'%(latitude)s %(longitude)s ' '%(altitude)s %(precision)s') % defaultdict( lambda: '', location_data.get(location_key))}) row = dict_pathkeys_to_nested_dicts(row) location_data = dict_pathkeys_to_nested_dicts(location_data) row = dict_merge(row, location_data) # inject our form's uuid into the submission row.update(ona_uuid) old_meta = row.get('meta', {}) new_meta, update = get_submission_meta_dict(xform, row_uuid) inserts += update old_meta.update(new_meta) row.update({'meta': old_meta}) row_uuid = row.get('meta').get('instanceID') rollback_uuids.append(row_uuid.replace('uuid:', '')) xml_file = cStringIO.StringIO( dict2xmlsubmission(row, xform, row_uuid, submission_date)) try: error, instance = safe_create_instance(username, xml_file, [], xform.uuid, None) except ValueError as e: error = e if error: Instance.objects.filter(uuid__in=rollback_uuids, xform=xform).delete() return {'error': str(error)} else: additions += 1 users = User.objects.filter( username=submitted_by) if submitted_by else [] if users: instance.user = users[0] instance.save() except UnicodeDecodeError: Instance.objects.filter(uuid__in=rollback_uuids, xform=xform).delete() return {'error': u'CSV file must be utf-8 encoded'} except Exception as e: Instance.objects.filter(uuid__in=rollback_uuids, xform=xform).delete() return {'error': str(e)} return {'additions': additions - inserts, 'updates': inserts}
def submit_csv(username, xform, csv_file): """ Imports CSV data to an existing form Takes a csv formatted file or string containing rows of submission/instance and converts those to xml submissions and finally submits them by calling :py:func:`onadata.libs.utils.logger_tools.safe_create_instance` :param str username: the subission user :param onadata.apps.logger.models.XForm xfrom: The submission's XForm. :param (str or file): A CSV formatted file with submission rows. :return: If sucessful, a dict with import summary else dict with error str. :rtype: Dict """ if isinstance(csv_file, unicode): csv_file = cStringIO.StringIO(csv_file) elif csv_file is None or not hasattr(csv_file, 'read'): return { 'error': (u'Invalid param type for `csv_file`. ' 'Expected utf-8 encoded file or unicode string ' 'got {} instead.'.format(type(csv_file).__name__)) } num_rows = sum(1 for row in csv_file) - 1 csv_file.seek(0) csv_reader = ucsv.DictReader(csv_file) # check for spaces in headers if any(' ' in header for header in csv_reader.fieldnames): return {'error': u'CSV file fieldnames should not contain spaces'} rollback_uuids = [] submission_time = datetime.utcnow().isoformat() ona_uuid = {'formhub': {'uuid': xform.uuid}} error = None additions = inserts = 0 try: for row in csv_reader: # fetch submission uuid before purging row metadata row_uuid = row.get('_uuid') submitted_by = row.get('_submitted_by') submission_date = row.get('_submission_time', submission_time) location_data = {} for key in row.keys(): # seems faster than a comprehension # remove metadata (keys starting with '_') if key.startswith('_'): del row[key] # Collect row location data into separate location_data dict if key.endswith( ('.latitude', '.longitude', '.altitude', '.precision')): location_key, location_prop = key.rsplit(u'.', 1) location_data.setdefault(location_key, {}).update( {location_prop: row.get(key, '0')}) # collect all location K-V pairs into single geopoint field(s) # in location_data dict for location_key in location_data.keys(): location_data.update({ location_key: (u'%(latitude)s %(longitude)s ' '%(altitude)s %(precision)s') % defaultdict(lambda: '', location_data.get(location_key)) }) row = dict_pathkeys_to_nested_dicts(row) location_data = dict_pathkeys_to_nested_dicts(location_data) row = dict_merge(row, location_data) # inject our form's uuid into the submission row.update(ona_uuid) old_meta = row.get('meta', {}) new_meta, update = get_submission_meta_dict(xform, row_uuid) inserts += update old_meta.update(new_meta) row.update({'meta': old_meta}) row_uuid = row.get('meta').get('instanceID') rollback_uuids.append(row_uuid.replace('uuid:', '')) xml_file = cStringIO.StringIO( dict2xmlsubmission(row, xform, row_uuid, submission_date)) try: error, instance = safe_create_instance(username, xml_file, [], xform.uuid, None) except ValueError as e: error = e if error: Instance.objects.filter(uuid__in=rollback_uuids, xform=xform).delete() return {'error': str(error)} else: additions += 1 try: current_task.update_state(state='PROGRESS', meta={ 'progress': additions, 'total': num_rows }) except: pass users = User.objects.filter( username=submitted_by) if submitted_by else [] if users: instance.user = users[0] instance.save() except UnicodeDecodeError: Instance.objects.filter(uuid__in=rollback_uuids, xform=xform).delete() return {'error': u'CSV file must be utf-8 encoded'} except Exception as e: Instance.objects.filter(uuid__in=rollback_uuids, xform=xform).delete() return {'error': str(e)} return {'additions': additions - inserts, 'updates': inserts}
def submit_csv(username, xform, csv_file, overwrite=False): """Imports CSV data to an existing form Takes a csv formatted file or string containing rows of submission/instance and converts those to xml submissions and finally submits them by calling :py:func:`onadata.libs.utils.logger_tools.safe_create_instance` :param str username: the submission user :param onadata.apps.logger.models.XForm xform: The submission's XForm. :param (str or file) csv_file: A CSV formatted file with submission rows. :return: If sucessful, a dict with import summary else dict with error str. :rtype: Dict """ csv_file_validation_summary = validate_csv_file(csv_file, xform) if csv_file_validation_summary.get('valid'): additional_col = csv_file_validation_summary.get('additional_col') else: return async_status( FAILED, csv_file_validation_summary.get('error_msg') ) num_rows = sum(1 for row in csv_file) - 1 # Change stream position to start of file csv_file.seek(0) csv_reader = ucsv.DictReader(csv_file, encoding='utf-8-sig') xform_json = json.loads(xform.json) select_multiples = [ qstn.name for qstn in xform.get_survey_elements_of_type(MULTIPLE_SELECT_TYPE)] ona_uuid = {'formhub': {'uuid': xform.uuid}} additions = duplicates = inserts = 0 rollback_uuids = [] errors = {} # Retrieve the columns we should validate values for # Currently validating date, datetime, integer and decimal columns col_to_validate = { 'date': (get_columns_by_type(XLS_DATE_FIELDS, xform_json), parse), 'datetime': ( get_columns_by_type(XLS_DATETIME_FIELDS, xform_json), parse), 'integer': (get_columns_by_type(['integer'], xform_json), int), 'decimal': (get_columns_by_type(['decimal'], xform_json), float) } if overwrite: instance_ids = [i['id'] for i in xform.instances.values('id')] xform.instances.filter(deleted_at__isnull=True)\ .update(deleted_at=timezone.now(), deleted_by=User.objects.get(username=username)) # send message send_message( instance_id=instance_ids, target_id=xform.id, target_type=XFORM, user=User.objects.get(username=username), message_verb=SUBMISSION_DELETED) try: for row_no, row in enumerate(csv_reader): # Remove additional columns for index in additional_col: del row[index] # Remove 'n/a' and '' values from csv row = {k: v for (k, v) in row.items() if v not in [NA_REP, '']} row, error = validate_row(row, col_to_validate) if error: errors[row_no] = error # Only continue the process if no errors where encountered while # validating the data if not errors: location_data = {} for key in list(row): # Collect row location data into separate location_data # dict if key.endswith(('.latitude', '.longitude', '.altitude', '.precision')): location_key, location_prop = key.rsplit(u'.', 1) location_data.setdefault(location_key, {}).update({ location_prop: row.get(key, '0') }) # collect all location K-V pairs into single geopoint field(s) # in location_data dict for location_key in list(location_data): location_data.update({ location_key: (u'%(latitude)s %(longitude)s ' '%(altitude)s %(precision)s') % defaultdict( lambda: '', location_data.get(location_key)) }) nested_dict = csv_dict_to_nested_dict( row, select_multiples=select_multiples) row = flatten_split_select_multiples( nested_dict, select_multiples=select_multiples) location_data = csv_dict_to_nested_dict(location_data) # Merge location_data into the Row data row = dict_merge(row, location_data) submission_time = datetime.utcnow().isoformat() row_uuid = row.get('meta/instanceID') or 'uuid:{}'.format( row.get(UUID)) if row.get(UUID) else None submitted_by = row.get('_submitted_by') submission_date = row.get('_submission_time', submission_time) for key in list(row): # remove metadata (keys starting with '_') if key.startswith('_'): del row[key] # Inject our forms uuid into the submission row.update(ona_uuid) old_meta = row.get('meta', {}) new_meta, update = get_submission_meta_dict(xform, row_uuid) inserts += update old_meta.update(new_meta) row.update({'meta': old_meta}) row_uuid = row.get('meta').get('instanceID') rollback_uuids.append(row_uuid.replace('uuid:', '')) try: xml_file = BytesIO( dict2xmlsubmission( row, xform, row_uuid, submission_date)) try: error, instance = safe_create_instance( username, xml_file, [], xform.uuid, None) except ValueError as e: error = e if error: if not (isinstance(error, OpenRosaResponse) and error.status_code == 202): Instance.objects.filter( uuid__in=rollback_uuids, xform=xform).delete() return async_status(FAILED, text(error)) else: duplicates += 1 else: additions += 1 if additions % PROGRESS_BATCH_UPDATE == 0: try: current_task.update_state( state='PROGRESS', meta={ 'progress': additions, 'total': num_rows, 'info': additional_col }) except Exception: logging.exception( _(u'Could not update state of ' 'import CSV batch process.')) finally: xform.submission_count(True) users = User.objects.filter( username=submitted_by) if submitted_by else [] if users: instance.user = users[0] instance.save() except Exception as e: return failed_import(rollback_uuids, xform, e, text(e)) except UnicodeDecodeError as e: return failed_import(rollback_uuids, xform, e, 'CSV file must be utf-8 encoded') if errors: # Rollback all created instances if an error occurred during # validation Instance.objects.filter( uuid__in=rollback_uuids, xform=xform).delete() xform.submission_count(True) return async_status( FAILED, u'Invalid CSV data imported in row(s): {}'.format( errors) if errors else '' ) else: added_submissions = additions - inserts event_by = User.objects.get(username=username) event_name = None tracking_properties = { 'xform_id': xform.pk, 'project_id': xform.project.pk, 'submitted_by': event_by, 'label': f'csv-import-for-form-{xform.pk}', 'from': 'CSV Import', } if added_submissions > 0: tracking_properties['value'] = added_submissions event_name = INSTANCE_CREATE_EVENT analytics.track( event_by, event_name, properties=tracking_properties) if inserts > 0: tracking_properties['value'] = inserts event_name = INSTANCE_UPDATE_EVENT analytics.track( event_by, event_name, properties=tracking_properties) return { 'additions': added_submissions, 'duplicates': duplicates, 'updates': inserts, 'info': "Additional column(s) excluded from the upload: '{0}'." .format(', '.join(list(additional_col)))}
def submit_csv(username, xform, csv_file, overwrite=False): """Imports CSV data to an existing form Takes a csv formatted file or string containing rows of submission/instance and converts those to xml submissions and finally submits them by calling :py:func:`onadata.libs.utils.logger_tools.safe_create_instance` :param str username: the subission user :param onadata.apps.logger.models.XForm xfrom: The submission's XForm. :param (str or file): A CSV formatted file with submission rows. :return: If sucessful, a dict with import summary else dict with error str. :rtype: Dict """ if isinstance(csv_file, str): csv_file = BytesIO(csv_file) elif csv_file is None or not hasattr(csv_file, 'read'): return async_status( FAILED, (u'Invalid param type for `csv_file`. ' 'Expected utf-8 encoded file or unicode' ' string got {} instead.'.format(type(csv_file).__name__))) num_rows = sum(1 for row in csv_file) - 1 csv_file.seek(0) csv_reader = ucsv.DictReader(csv_file, encoding='utf-8-sig') csv_header = csv_reader.fieldnames # check for spaces in headers if any(' ' in header for header in csv_header): return async_status(FAILED, u'CSV file fieldnames should not contain spaces') # Get the data dictionary xform_header = xform.get_headers() missing_col = set(xform_header).difference(csv_header) addition_col = set(csv_header).difference(xform_header) # change to list missing_col = list(missing_col) addition_col = list(addition_col) # remove all metadata columns missing = [ col for col in missing_col if not col.startswith("_") and col not in IGNORED_COLUMNS ] # remove all metadata inside groups missing = [col for col in missing if '/_' not in col] # ignore if is multiple select question for col in csv_header: # this col is a multiple select question survey_element = xform.get_survey_element(col) if survey_element and \ survey_element.get('type') == MULTIPLE_SELECT_TYPE: # remove from the missing and additional list missing = [x for x in missing if not x.startswith(col)] addition_col.remove(col) # remove headers for repeats that might be missing from csv missing = sorted([m for m in missing if m.find('[') == -1]) # Include additional repeats addition_col = [a for a in addition_col if a.find('[') == -1] if missing: return async_status( FAILED, u"Sorry uploaded file does not match the form. " u"The file is missing the column(s): " u"{0}.".format(', '.join(missing))) if overwrite: xform.instances.filter(deleted_at__isnull=True)\ .update(deleted_at=timezone.now(), deleted_by=User.objects.get(username=username)) rollback_uuids = [] submission_time = datetime.utcnow().isoformat() ona_uuid = {'formhub': {'uuid': xform.uuid}} error = None additions = duplicates = inserts = 0 try: for row in csv_reader: # remove the additional columns for index in addition_col: del row[index] # fetch submission uuid before purging row metadata row_uuid = row.get('meta/instanceID') or row.get('_uuid') submitted_by = row.get('_submitted_by') submission_date = row.get('_submission_time', submission_time) location_data = {} for key in list(row): # seems faster than a comprehension # remove metadata (keys starting with '_') if key.startswith('_'): del row[key] # Collect row location data into separate location_data dict if key.endswith( ('.latitude', '.longitude', '.altitude', '.precision')): location_key, location_prop = key.rsplit(u'.', 1) location_data.setdefault(location_key, {}).update( {location_prop: row.get(key, '0')}) # remove 'n/a' values if not key.startswith('_') and row[key] == 'n/a': del row[key] # collect all location K-V pairs into single geopoint field(s) # in location_data dict for location_key in list(location_data): location_data.update({ location_key: (u'%(latitude)s %(longitude)s ' '%(altitude)s %(precision)s') % defaultdict(lambda: '', location_data.get(location_key)) }) row = csv_dict_to_nested_dict(row) location_data = csv_dict_to_nested_dict(location_data) row = dict_merge(row, location_data) # inject our form's uuid into the submission row.update(ona_uuid) old_meta = row.get('meta', {}) new_meta, update = get_submission_meta_dict(xform, row_uuid) inserts += update old_meta.update(new_meta) row.update({'meta': old_meta}) row_uuid = row.get('meta').get('instanceID') rollback_uuids.append(row_uuid.replace('uuid:', '')) xml_file = BytesIO( dict2xmlsubmission(row, xform, row_uuid, submission_date)) try: error, instance = safe_create_instance(username, xml_file, [], xform.uuid, None) except ValueError as e: error = e if error: if not (isinstance(error, OpenRosaResponse) and error.status_code == 202): Instance.objects.filter(uuid__in=rollback_uuids, xform=xform).delete() return async_status(FAILED, text(error)) else: duplicates += 1 else: additions += 1 if additions % PROGRESS_BATCH_UPDATE == 0: try: current_task.update_state(state='PROGRESS', meta={ 'progress': additions, 'total': num_rows, 'info': addition_col }) print(current_task) except Exception: logging.exception( _(u'Could not update state of ' 'import CSV batch process.')) finally: xform.submission_count(True) users = User.objects.filter( username=submitted_by) if submitted_by else [] if users: instance.user = users[0] instance.save() except UnicodeDecodeError as e: return failed_import(rollback_uuids, xform, e, u'CSV file must be utf-8 encoded') except Exception as e: return failed_import(rollback_uuids, xform, e, text(e)) finally: xform.submission_count(True) return { "additions": additions - inserts, "duplicates": duplicates, u"updates": inserts, u"info": u"Additional column(s) excluded from the upload: '{0}'." .format(', '.join(list(addition_col))) } # yapf: disable
def submit_csv(username, xform, csv_file): """ Imports CSV data to an existing form Takes a csv formatted file or string containing rows of submission/instance and converts those to xml submissions and finally submits them by calling :py:func:`onadata.libs.utils.logger_tools.safe_create_instance` :param str username: the subission user :param onadata.apps.logger.models.XForm xfrom: The submission's XForm. :param (str or file): A CSV formatted file with submission rows. :return: If sucessful, a dict with import summary else dict with error str. :rtype: Dict """ if isinstance(csv_file, (str, unicode)): csv_file = cStringIO.StringIO(csv_file) elif csv_file is None or not hasattr(csv_file, 'read'): return {'error': (u'Invalid param type for `csv_file`. ' 'Expected file or String ' 'got {} instead.'.format(type(csv_file).__name__))} csv_reader = ucsv.DictReader(csv_file) rollback_uuids = [] submission_time = datetime.utcnow().isoformat() ona_uuid = {'formhub': {'uuid': xform.uuid}} error = None additions = inserts = 0 for row in csv_reader: # fetch submission uuid before purging row metadata row_uuid = row.get('_uuid') submitted_by = row.get('_submitted_by') submission_date = row.get('_submission_time', submission_time) for key in row.keys(): # seems faster than a comprehension # remove metadata (keys starting with '_') if key.startswith('_'): del row[key] # process nested data e.g x[formhub/uuid] => x[formhub][uuid] if r'/' in key: p, c = key.split('/') row[p] = {c: row[key]} del row[key] # inject our form's uuid into the submission row.update(ona_uuid) old_meta = row.get('meta', {}) new_meta, update = get_submission_meta_dict(xform, row_uuid) inserts += update old_meta.update(new_meta) row.update({'meta': old_meta}) row_uuid = row.get('meta').get('instanceID') rollback_uuids.append(row_uuid.replace('uuid:', '')) xml_file = cStringIO.StringIO(dict2xmlsubmission(row, xform, row_uuid, submission_date)) try: error, instance = safe_create_instance(username, xml_file, [], xform.uuid, None) except ValueError as e: error = e if error: Instance.objects.filter(uuid__in=rollback_uuids, xform=xform).delete() return {'error': str(error)} else: additions += 1 users = User.objects.filter( username=submitted_by) if submitted_by else [] if users: instance.user = users[0] instance.save() return {'additions': additions - inserts, 'updates': inserts}