Exemple #1
0
def delete_data(request, username=None, id_string=None):
    xform, owner = check_and_set_user_and_form(username, id_string, request)
    response_text = u""
    if not xform:
        return HttpResponseForbidden(_(u"Not shared."))

    data_id = request.POST.get("id")
    if not data_id:
        return HttpResponseBadRequest(_(u"id must be specified"))

    Instance.set_deleted_at(data_id)
    audit = {"xform": xform.id_string}
    audit_log(
        Actions.SUBMISSION_DELETED,
        request.user,
        owner,
        _("Deleted submission with id '%(record_id)s' " "on '%(id_string)s'.")
        % {"id_string": xform.id_string, "record_id": data_id},
        audit,
        request,
    )
    response_text = json.dumps({"success": "Deleted data %s" % data_id})
    if "callback" in request.GET and request.GET.get("callback") != "":
        callback = request.GET.get("callback")
        response_text = "%s(%s)" % (callback, response_text)
    return HttpResponse(response_text, mimetype="application/json")
Exemple #2
0
def delete_data(request, username=None, id_string=None):
    xform, owner = check_and_set_user_and_form(username, id_string, request)
    response_text = u''
    if not xform:
        return HttpResponseForbidden(_(u'Not shared.'))

    data_id = request.POST.get('id')
    if not data_id:
        return HttpResponseBadRequest(_(u"id must be specified"))

    Instance.set_deleted_at(data_id)
    audit = {
        'xform': xform.id_string
    }
    audit_log(
        Actions.SUBMISSION_DELETED, request.user, owner,
        _("Deleted submission with id '%(record_id)s' "
            "on '%(id_string)s'.") %
        {
            'id_string': xform.id_string,
            'record_id': data_id
        }, audit, request)
    response_text = json.dumps({"success": "Deleted data %s" % data_id})
    if 'callback' in request.GET and request.GET.get('callback') != '':
        callback = request.GET.get('callback')
        response_text = ("%s(%s)" % (callback, response_text))
    return HttpResponse(response_text, mimetype='application/json')
Exemple #3
0
 def test_delete_instance_metadata_without_perms(self):
     user = User(username="******")
     instance = Instance()
     instance.xform = XForm()
     # user.has_perms.return_value = False
     request = MagicMock(user=user, method='DELETE')
     obj = MagicMock(content_object=instance)
     self.assertFalse(
         self.permissions.has_object_permission(request, self.view, obj))
Exemple #4
0
    def test_kml_export_data(self):
        """
        Test kml_export_data(id_string, user, xform=None).
        """
        kml_md = """
        | survey |
        |        | type              | name  | label |
        |        | geopoint          | gps   | GPS   |
        |        | select one fruits | fruit | Fruit |

        | choices |
        |         | list name | name   | label  |
        |         | fruits    | orange | Orange |
        |         | fruits    | mango  | Mango  |
        """
        xform1 = self._publish_markdown(kml_md, self.user, id_string='a')
        xform2 = self._publish_markdown(kml_md, self.user, id_string='b')
        xml = '<data id="a"><gps>-1.28 36.83</gps><fruit>orange</fruit></data>'
        Instance(xform=xform1, xml=xml).save()
        xml = '<data id="b"><gps>32.85 13.04</gps><fruit>mango</fruit></data>'
        Instance(xform=xform2, xml=xml).save()
        data = {
            'xforms': [
                "http://testserver/api/v1/forms/%s" % xform1.pk,
                "http://testserver/api/v1/forms/%s" % xform2.pk,
            ],
            'name': 'Merged Dataset',
            'project':
            "http://testserver/api/v1/projects/%s" % xform1.project.pk,
        }  # yapf: disable
        request = self.factory.post('/')
        request.user = self.user
        serializer = MergedXFormSerializer(data=data,
                                           context={'request': request})
        self.assertTrue(serializer.is_valid())
        serializer.save()
        xform = XForm.objects.filter(pk__gt=xform2.pk,
                                     is_merged_dataset=True).first()
        expected_data = [{
            'name': u'a',
            'image_urls': [],
            'lat': -1.28,
            'table': u'<table border="1"><a href="#"><img width="210" class="thumbnail" src="" alt=""></a><tr><td>GPS</td><td>-1.28 36.83</td></tr><tr><td>Fruit</td><td>orange</td></tr></table>',  # noqa pylint: disable=C0301
            'lng': 36.83,
            'id': xform1.instances.all().first().pk
        }, {
            'name': u'b',
            'image_urls': [],
            'lat': 32.85,
            'table':
            u'<table border="1"><a href="#"><img width="210" class="thumbnail" src="" alt=""></a><tr><td>GPS</td><td>32.85 13.04</td></tr><tr><td>Fruit</td><td>mango</td></tr></table>',  # noqa pylint: disable=C0301
            'lng': 13.04,
            'id': xform2.instances.all().first().pk
        }]  # yapf: disable
        self.assertEqual(kml_export_data(xform.id_string, xform.user),
                         expected_data)
def _make_submissions_merged_datasets(merged_xform):
    # make submission to form a
    form_a = merged_xform.xforms.all()[0]
    xml = '<data id="a"><fruit>orange</fruit></data>'
    Instance(xform=form_a, xml=xml).save()

    # make submission to form b
    form_b = merged_xform.xforms.all()[1]
    xml = '<data id="b"><fruit>mango</fruit></data>'
    Instance(xform=form_b, xml=xml).save()
    def test_md_data_viewset_deleted_form(self):
        """Test retrieving data of a merged dataset with one form deleted"""
        merged_dataset = self._create_merged_dataset()
        merged_xform = MergedXForm.objects.get(pk=merged_dataset['id'])
        request = self.factory.get('/', **self.extra)
        data_view = DataViewSet.as_view({
            'get': 'list',
        })

        # make submission to form a
        form_a = merged_xform.xforms.all()[0]
        xml = '<data id="a"><fruit>orange</fruit></data>'
        Instance(xform=form_a, xml=xml).save()

        # DataViewSet /data/[pk] endpoint
        response = data_view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(len(response.data), 1)

        fruit = [d['fruit'] for d in response.data]
        expected_fruit = ['orange']
        self.assertEqual(fruit, expected_fruit)

        # make submission to form b
        form_b = merged_xform.xforms.all()[1]
        xml = '<data id="b"><fruit>mango</fruit></data>'
        Instance(xform=form_b, xml=xml).save()

        # DataViewSet /data/[pk] endpoint
        response = data_view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(len(response.data), 2)
        dataid = response.data[0]['_id']

        fruit = [d['fruit'] for d in response.data]
        expected_fruit = ['orange', 'mango']
        self.assertEqual(fruit, expected_fruit)

        # DataViewSet /data/[pk] endpoint, form_a deleted
        form_a.soft_delete()
        response = data_view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(len(response.data), 1)

        fruit = [d['fruit'] for d in response.data]
        expected_fruit = ['mango']
        self.assertEqual(fruit, expected_fruit)

        # DataViewSet /data/[pk]/[dataid] endpoint, form_a deleted
        data_view = DataViewSet.as_view({
            'get': 'retrieve',
        })
        response = data_view(request, pk=merged_dataset['id'], dataid=dataid)
        self.assertEqual(response.status_code, 404)
Exemple #7
0
 def test_delete_instance_metadata_without_perms(self, has_perms_mock):
     """
     Test that a user cannot delete an instance if they are not allowed
     through the XForm or the Project
     """
     has_perms_mock.return_value = False
     user = User(username="******")
     instance = Instance(user=User(username="******"))
     instance.xform = XForm()
     request = MagicMock(user=user, method='DELETE')
     obj = MagicMock(content_object=instance)
     self.assertFalse(
         self.permissions.has_object_permission(request, self.view, obj))
Exemple #8
0
 def test_delete_instance_metadata_without_perms(self, has_perms_mock):
     """
     Test that a user cannot delete an instance if they are not allowed
     through the XForm or the Project
     """
     has_perms_mock.return_value = False
     user = User(username="******")
     instance = Instance(user=User(username="******"))
     instance.xform = XForm()
     request = MagicMock(user=user, method='DELETE')
     obj = MagicMock(content_object=instance)
     self.assertFalse(
         self.permissions.has_object_permission(
             request, self.view, obj))
    def test_merged_datasets_retrieve(self):
        """Test retrieving a specific merged dataset"""
        merged_dataset = self._create_merged_dataset(geo=True)
        merged_xform = MergedXForm.objects.get(pk=merged_dataset['id'])

        # make submission to form b
        form_b = merged_xform.xforms.all()[1]
        xml = '<data id="b"><fruit>mango</fruit></data>'
        instance = Instance(xform=form_b, xml=xml)
        instance.save()
        form_b.refresh_from_db()
        form_b.last_submission_time = instance.date_created
        form_b.save()
        view = MergedXFormViewSet.as_view({'get': 'retrieve'})
        request = self.factory.get('/')

        # status_code is 404 when the pk doesn't exist
        response = view(request, pk=(1000 * merged_dataset['id']))
        self.assertEqual(response.status_code, 404)

        # status_code is 404 when: pk exists, user is not authenticated
        response = view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 404)

        # status_code is 200 when: pk exists, user is authenticated
        request = self.factory.get('/', **self.extra)
        response = view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)

        # data has expected fields
        self.assertIn('id', response.data)
        self.assertIn('title', response.data)
        self.assertIn('xforms', response.data)
        self.assertEqual(response.data['num_of_submissions'], 1)
        self.assertEqual(response.data['last_submission_time'],
                         form_b.last_submission_time.isoformat())

        # merged dataset should be available at api/forms/[pk] endpoint
        request = self.factory.get('/', **self.extra)
        view = XFormViewSet.as_view({'get': 'retrieve'})
        response = view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(merged_dataset['id'], response.data['formid'])
        self.assertIn('is_merged_dataset', response.data)
        self.assertTrue(response.data['is_merged_dataset'])
        self.assertTrue(response.data['instances_with_geopoints'])
        self.assertEqual(response.data['num_of_submissions'], 1)
        self.assertEqual(response.data['last_submission_time'],
                         form_b.last_submission_time.isoformat())
    def test_merged_datasets_retrieve(self):
        """Test retrieving a specific merged dataset"""
        merged_dataset = self._create_merged_dataset(geo=True)
        merged_xform = MergedXForm.objects.get(pk=merged_dataset['id'])

        # make submission to form b
        form_b = merged_xform.xforms.all()[1]
        xml = '<data id="b"><fruit>mango</fruit></data>'
        instance = Instance(xform=form_b, xml=xml)
        instance.save()
        form_b.refresh_from_db()
        form_b.last_submission_time = instance.date_created
        form_b.save()
        view = MergedXFormViewSet.as_view({'get': 'retrieve'})
        request = self.factory.get('/')

        # status_code is 404 when the pk doesn't exist
        response = view(request, pk=(1000 * merged_dataset['id']))
        self.assertEqual(response.status_code, 404)

        # status_code is 404 when: pk exists, user is not authenticated
        response = view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 404)

        # status_code is 200 when: pk exists, user is authenticated
        request = self.factory.get('/', **self.extra)
        response = view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)

        # data has expected fields
        self.assertIn('id', response.data)
        self.assertIn('title', response.data)
        self.assertIn('xforms', response.data)
        self.assertEqual(response.data['num_of_submissions'], 1)
        self.assertEqual(response.data['last_submission_time'],
                         form_b.last_submission_time.isoformat())

        # merged dataset should be available at api/forms/[pk] endpoint
        request = self.factory.get('/', **self.extra)
        view = XFormViewSet.as_view({'get': 'retrieve'})
        response = view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(merged_dataset['id'], response.data['formid'])
        self.assertIn('is_merged_dataset', response.data)
        self.assertTrue(response.data['is_merged_dataset'])
        self.assertTrue(response.data['instances_with_geopoints'])
        self.assertEqual(response.data['num_of_submissions'], 1)
        self.assertEqual(response.data['last_submission_time'],
                         form_b.last_submission_time.isoformat())
Exemple #11
0
    def handle(self, *args, **options):
        project_fxf = options["project_fxf"]
        pattern = options["pattern"]
        instances = Instance.objects.filter(
            fieldsight_instance__project_fxf__pk=project_fxf).only('xml')
        matches = instances.annotate(match=Func(
            F('xml'), Value(pattern), function='regexp_matches')).values_list(
                'pk', 'match')

        instances = [i[0] for i in matches]

        if not instances:
            self.stderr.write('No Instances found.')
            return
        self.stderr.write('{} instance found for  pattern {}'.format(
            len(instances), pattern))

        for instance_id in instances:

            queryset = Instance.objects.filter(pk=instance_id).only('xml')
            ih = InstanceHistory(xform_instance=queryset[0],
                                 xml=queryset[0].xml)
            ih.save()
            fixed_xml = replace_all_pattern(project_fxf, queryset[0].xml)
            new_xml_hash = Instance.get_hash(fixed_xml)
            queryset.update(xml=fixed_xml, xml_hash=new_xml_hash)
            new_instance = queryset[0]
            new_instance.xml = fixed_xml
            new_instance.xml_hash = new_xml_hash
            update_mongo(new_instance)

        self.stderr.write('\nFinished {} '.format(instance_id, ))
Exemple #12
0
    def test_upload_to_with_anonymous_user(self):
        instance = Instance(user=self.user, xform=self.xform)
        metadata = MetaData(data_type="media")
        metadata.content_object = instance
        filename = "filename"
        self.assertEquals(upload_to(metadata, filename),
                          "{}/{}/{}".format(self.user.username,
                                            'formid-media',
                                            filename))
        # test instance with anonymous user

        instance_without_user = Instance(xform=self.xform)
        metadata.content_object = instance_without_user
        self.assertEquals(upload_to(metadata, filename),
                          "{}/{}/{}".format(self.xform.user.username,
                                            'formid-media',
                                            filename))
Exemple #13
0
def create_instance(username,
                    xml_file,
                    media_files,
                    status=u'submitted_via_web',
                    uuid=None,
                    date_created_override=None,
                    request=None):
    """
    Submission cases:
        If there is a username and no uuid, submitting an old ODK form.
        If there is a username and a uuid, submitting a new ODK form.
    """
    instance = None
    submitted_by = request.user \
        if request and request.user.is_authenticated() else None

    if username:
        username = username.lower()

    xml = xml_file.read()
    xml_hash = Instance.get_hash(xml)
    xform = get_xform_from_submission(xml, username, uuid)
    check_submission_permissions(request, xform)

    # Dorey's rule from 2012 (commit 890a67aa):
    #   Ignore submission as a duplicate IFF
    #    * a submission's XForm collects start time
    #    * the submitted XML is an exact match with one that
    #      has already been submitted for that user.
    if xform.has_start_time:
        # XML matches are identified by identical content hash OR, when a
        # content hash is not present, by string comparison of the full
        # content, which is slow! Use the management command
        # `populate_xml_hashes_for_instances` to hash existing submissions
        existing_instance = Instance.objects.filter(
            Q(xml_hash=xml_hash)
            | Q(xml_hash=Instance.DEFAULT_XML_HASH, xml=xml),
            xform__user=xform.user,
        ).first()
    else:
        existing_instance = None

    # get new and deprecated uuid's
    new_uuid = get_uuid_from_xml(xml)

    if existing_instance:
        # ensure we have saved the extra attachments
        any_new_attachment = save_attachments(existing_instance, media_files)
        if not any_new_attachment:
            raise DuplicateInstance()
        else:
            # Update Mongo via the related ParsedInstance
            existing_instance.parsed_instance.save(async=False)
            return existing_instance
    else:
        instance = save_submission(xform, xml, media_files, new_uuid,
                                   submitted_by, status, date_created_override)
        return instance
    def test_submissions_not_allowed(self):
        """Test submissions to a merged form is not allowed"""
        merged_dataset = self._create_merged_dataset()
        merged_xform = XForm.objects.get(pk=merged_dataset['id'])

        # make submission to form a
        xml = '<data id="a"><fruit>orange</fruit></data>'
        with self.assertRaises(FormIsMergedDatasetError):
            Instance(xform=merged_xform, xml=xml).save()
Exemple #15
0
def create_instance(fsxfid, xml_file, media_files,
                    status=u'submitted_via_web', uuid=None,
                    date_created_override=None, request=None, site=None, fs_proj_xf=None, proj_id=None, xform=None, flagged_instance=None):

    with transaction.atomic():
        instance = None
        submitted_by = request.user \
            if request and request.user.is_authenticated() else None
        xml = xml_file.read()
        xml_hash = Instance.get_hash(xml)

        if xform.has_start_time:
            # XML matches are identified by identical content hash OR, when a
            # content hash is not present, by string comparison of the full
            # content, which is slow! Use the management command
            # `populate_xml_hashes_for_instances` to hash existing submissions
            existing_instance = Instance.objects.filter(
                Q(xml_hash=xml_hash) |
                Q(xml_hash=Instance.DEFAULT_XML_HASH, xml=xml),
                xform__user=xform.user,
            ).first()
        else:
            existing_instance = None


        new_uuid = get_uuid_from_xml(xml)
        uuid = new_uuid

        if existing_instance:
            # ensure we have saved the extra attachments
            any_new_attachment = save_attachments(existing_instance, media_files)
            if not any_new_attachment:
                raise DuplicateInstance()
            else:
                # Update Mongo via the related ParsedInstance
                if fs_proj_xf:
                    fs_poj_id = str(fs_proj_xf)
                else:
                    fs_poj_id = ""
                pi, created = FieldSightParsedInstance.get_or_create(existing_instance,
                                                                     update_data={'fs_uuid': str(fsxfid), 'fs_status': 0,
                                                                                  'fs_site': site,
                                                                                  'fs_project': proj_id,
                                                                                  'fs_project_uuid': fs_poj_id})
                print()
                return existing_instance
        else:

            if fsxfid is None:
                fsxfid = ""
            if site is None:
                site = ""
            instance = save_submission(xform, xml, media_files, uuid,
                                           submitted_by, status,
                                           date_created_override, str(fsxfid), str(site), fs_proj_xf, proj_id, flagged_instance)
            return instance
Exemple #16
0
def _get_instance(
    request: 'rest_framework.request.Request',
    xml: str,
    new_uuid: str,
    status: str,
    xform: XForm,
    defer_counting: bool = False,
) -> Instance:
    """
    `defer_counting=False` will set a Python-only attribute of the same name on
    the *new* `Instance` if one is created. This will prevent
    `update_xform_submission_count()` from doing anything, which avoids locking
    any rows in `logger_xform` or `main_userprofile`.
    """
    # check if its an edit submission
    old_uuid = get_deprecated_uuid_from_xml(xml)
    instances = Instance.objects.filter(uuid=old_uuid)

    if instances:
        # edits
        instance = instances[0]
        check_edit_submission_permissions(request, xform, instance)
        InstanceHistory.objects.create(xml=instance.xml,
                                       xform_instance=instance,
                                       uuid=old_uuid)
        instance.xml = xml
        instance._populate_xml_hash()
        instance.uuid = new_uuid
        instance.save()
    else:
        submitted_by = (request.user
                        if request and request.user.is_authenticated else None)
        # new submission
        # Avoid `Instance.objects.create()` so that we can set a Python-only
        # attribute, `defer_counting`, before saving
        instance = Instance()
        instance.xml = xml
        instance.user = submitted_by
        instance.status = status
        instance.xform = xform
        if defer_counting:
            # Only set the attribute if requested, i.e. don't bother ever
            # setting it to `False`
            instance.defer_counting = True
        instance.save()

    return instance
Exemple #17
0
def create_instance(username, xml_file, media_files,
                    status=u'submitted_via_web', uuid=None,
                    date_created_override=None, request=None):
    """
    Submission cases:
        If there is a username and no uuid, submitting an old ODK form.
        If there is a username and a uuid, submitting a new ODK form.
    """
    instance = None
    submitted_by = request.user \
        if request and request.user.is_authenticated() else None

    if username:
        username = username.lower()

    xml = xml_file.read()
    xml_hash = Instance.get_hash(xml)
    xform = get_xform_from_submission(xml, username, uuid)
    check_submission_permissions(request, xform)

    # Dorey's rule from 2012 (commit 890a67aa):
    #   Ignore submission as a duplicate IFF
    #    * a submission's XForm collects start time
    #    * the submitted XML is an exact match with one that
    #      has already been submitted for that user.
    if xform.has_start_time:
        # XML matches are identified by identical content hash OR, when a
        # content hash is not present, by string comparison of the full
        # content, which is slow! Use the management command
        # `populate_xml_hashes_for_instances` to hash existing submissions
        existing_instance = Instance.objects.filter(
            Q(xml_hash=xml_hash) | Q(xml_hash=Instance.DEFAULT_XML_HASH, xml=xml),
            xform__user=xform.user,
        ).first()
    else:
        existing_instance = None

    # get new and deprecated uuid's
    new_uuid = get_uuid_from_xml(xml)

    if existing_instance:
        # ensure we have saved the extra attachments
        any_new_attachment = save_attachments(existing_instance, media_files)
        if not any_new_attachment:
            raise DuplicateInstance()
        else:
            # Update Mongo via the related ParsedInstance
            existing_instance.parsed_instance.save(async=False)
            return existing_instance
    else:
        instance = save_submission(xform, xml, media_files, new_uuid,
                                   submitted_by, status,
                                   date_created_override)
        return instance
Exemple #18
0
def _get_instance(xml,
                  new_uuid,
                  submitted_by,
                  status,
                  xform,
                  defer_counting=False):
    '''
    `defer_counting=False` will set a Python-only attribute of the same name on
    the *new* `Instance` if one is created. This will prevent
    `update_xform_submission_count()` from doing anything, which avoids locking
    any rows in `logger_xform` or `main_userprofile`.
    '''
    # check if its an edit submission
    old_uuid = get_deprecated_uuid_from_xml(xml)
    instances = Instance.objects.filter(uuid=old_uuid)

    if instances:
        # edits
        check_edit_submission_permissions(submitted_by, xform)
        instance = instances[0]
        InstanceHistory.objects.create(xml=instance.xml,
                                       xform_instance=instance,
                                       uuid=old_uuid)
        instance.xml = xml
        instance._populate_xml_hash()
        instance.uuid = new_uuid
        instance.save()
    else:
        # new submission

        # Avoid `Instance.objects.create()` so that we can set a Python-only
        # attribute, `defer_counting`, before saving
        instance = Instance()
        instance.xml = xml
        instance.user = submitted_by
        instance.status = status
        instance.xform = xform
        if defer_counting:
            # Only set the attribute if requested, i.e. don't bother ever
            # setting it to `False`
            instance.defer_counting = True
        instance.save()

    return instance
    def test_merged_datasets_deleted_parent_retrieve(self):
        """Test retrieving a specific merged dataset when the parent is deleted
        """
        merged_dataset = self._create_merged_dataset(geo=True)
        merged_xform = MergedXForm.objects.get(pk=merged_dataset['id'])

        # make submission to form b
        form_b = merged_xform.xforms.all()[1]
        xml = '<data id="b"><fruit>mango</fruit></data>'
        instance = Instance(xform=form_b, xml=xml)
        instance.save()
        form_b.refresh_from_db()
        form_b.last_submission_time = instance.date_created
        form_b.save()
        view = MergedXFormViewSet.as_view({'get': 'retrieve'})

        # status_code is 200 when: pk exists, user is authenticated

        request = self.factory.get('/', **self.extra)
        response = view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)

        # delete parents
        [parent.delete() for parent in merged_xform.xforms.all()]
        merged_xform.refresh_from_db()

        # merged dataset should be available at api/forms/[pk] endpoint
        request = self.factory.get('/', **self.extra)
        view = XFormViewSet.as_view({'get': 'retrieve'})
        response = view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(merged_dataset['id'], response.data['formid'])
        self.assertTrue(response.data['is_merged_dataset'])
        self.assertTrue(response.data['instances_with_geopoints'])
        # deleted parents, 0 submissions
        self.assertEqual(response.data['num_of_submissions'], 0)
    def test_merged_datasets_deleted_parent_retrieve(self):
        """Test retrieving a specific merged dataset when the parent is deleted
        """
        merged_dataset = self._create_merged_dataset(geo=True)
        merged_xform = MergedXForm.objects.get(pk=merged_dataset['id'])

        # make submission to form b
        form_b = merged_xform.xforms.all()[1]
        xml = '<data id="b"><fruit>mango</fruit></data>'
        instance = Instance(xform=form_b, xml=xml)
        instance.save()
        form_b.refresh_from_db()
        form_b.last_submission_time = instance.date_created
        form_b.save()
        view = MergedXFormViewSet.as_view({'get': 'retrieve'})

        # status_code is 200 when: pk exists, user is authenticated

        request = self.factory.get('/', **self.extra)
        response = view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)

        # delete parents
        [parent.delete() for parent in merged_xform.xforms.all()]
        merged_xform.refresh_from_db()

        # merged dataset should be available at api/forms/[pk] endpoint
        request = self.factory.get('/', **self.extra)
        view = XFormViewSet.as_view({'get': 'retrieve'})
        response = view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(merged_dataset['id'], response.data['formid'])
        self.assertTrue(response.data['is_merged_dataset'])
        self.assertTrue(response.data['instances_with_geopoints'])
        # deleted parents, 0 submissions
        self.assertEqual(response.data['num_of_submissions'], 0)
Exemple #21
0
def _get_instance(xml, new_uuid, submitted_by, status, xform,
                  defer_counting=False):
    '''
    `defer_counting=False` will set a Python-only attribute of the same name on
    the *new* `Instance` if one is created. This will prevent
    `update_xform_submission_count()` from doing anything, which avoids locking
    any rows in `logger_xform` or `main_userprofile`.
    '''
    # check if its an edit submission
    old_uuid = get_deprecated_uuid_from_xml(xml)
    instances = Instance.objects.filter(uuid=old_uuid)

    if instances:
        # edits
        check_edit_submission_permissions(submitted_by, xform)
        instance = instances[0]
        InstanceHistory.objects.create(
            xml=instance.xml, xform_instance=instance, uuid=old_uuid)
        instance.xml = xml
        instance._populate_xml_hash()
        instance.uuid = new_uuid
        instance.save()
    else:
        # new submission

        # Avoid `Instance.objects.create()` so that we can set a Python-only
        # attribute, `defer_counting`, before saving
        instance = Instance()
        instance.xml = xml
        instance.user = submitted_by
        instance.status = status
        instance.xform = xform
        if defer_counting:
            # Only set the attribute if requested, i.e. don't bother ever
            # setting it to `False`
            instance.defer_counting = True
        instance.save()

    return instance
Exemple #22
0
    def get_simple_instance(self, custom_values={}):
        simple_xforms = XForm.objects.filter(title="WaterSimple")
        if simple_xforms.count() == 0:
            xf = self.create_simple_xform()
        else:
            xf = simple_xforms[0]

        # these values can be overridden with custom values
        values = {
            'device_id': '12345',
            'start': '2011-01-01T09:50:06.966',
            'end': '2011-01-01T09:53:22.965',
            'geopoint': '40.783594633609184 -73.96436698913574 300.0 4.0'
        }

        if 'start' in custom_values:
            st = custom_values['start']
            custom_values['start'] = st.strftime(XFORM_TIME_FORMAT)

            # if no end_time is specified, defaults to 1 hour
            values['end'] = (st + ONE_HOUR).strftime(XFORM_TIME_FORMAT)

        if 'end' in custom_values:
            custom_values['end'] = custom_values['end'].strftime(
                XFORM_TIME_FORMAT)

        values.update(custom_values)

        water_simple_survey = _load_simple_survey_object()
        simple_survey = water_simple_survey.instantiate()

        for k, v in values.items():
            simple_survey.answer(name=k, value=v)

        # setting the id_string so that it doesn't end up
        # with the timestamp of the new survey object
        simple_survey._id = xf.id_string

        instance_xml = simple_survey.to_xml()

        return Instance(xml=instance_xml)
Exemple #23
0
    def get_registration_instance(self, custom_values={}):
        """
        1. Checks to see if the registration form has been created alread.
           If not, it loads it in.
        2. Loads a registration instance.
        """
        registration_xforms = XForm.objects.filter(title="registration")
        if registration_xforms.count() == 0:
            xf = self.create_registration_xform()
        else:
            xf = registration_xforms[0]

        values = {
            'device_id': '12345',
            'start': '2011-01-01T09:50:06.966',
            'end': '2011-01-01T09:53:22.965'
        }

        if 'start' in custom_values:
            st = custom_values['start']
            custom_values['start'] = st.strftime(XFORM_TIME_FORMAT)

            # if no end_time is specified, defaults to 1 hour
            values['end'] = (st + ONE_HOUR).strftime(XFORM_TIME_FORMAT)

        if 'end' in custom_values:
            custom_values['end'] = custom_values['end'].strftime(
                XFORM_TIME_FORMAT)

        values.update(custom_values)

        reg_xform = _load_registration_survey_object()
        reg_instance = reg_xform.instantiate()
        reg_instance._id = xf.id_string

        for k, v in values.items():
            reg_instance.answer(name=k, value=v)

        instance_xml = reg_instance.to_xml()

        return Instance(xml=instance_xml)
Exemple #24
0
def create_instance(username,
                    xml_file,
                    media_files,
                    status='submitted_via_web',
                    uuid=None,
                    date_created_override=None,
                    request=None):
    """
    Submission cases:
        If there is a username and no uuid, submitting an old ODK form.
        If there is a username and a uuid, submitting a new ODK form.
    """
    instance = None
    submitted_by = request.user \
        if request and request.user.is_authenticated() else None

    if username:
        username = username.lower()

    xml = xml_file.read()
    xml_hash = Instance.get_hash(xml)
    xform = get_xform_from_submission(xml, username, uuid)
    check_submission_permissions(request, xform)

    # get new and deprecated uuid's
    new_uuid = get_uuid_from_xml(xml)

    # Dorey's rule from 2012 (commit 890a67aa):
    #   Ignore submission as a duplicate IFF
    #    * a submission's XForm collects start time
    #    * the submitted XML is an exact match with one that
    #      has already been submitted for that user.
    # The start-time requirement protected submissions with identical responses
    # from being rejected as duplicates *before* KoBoCAT had the concept of
    # submission UUIDs. Nowadays, OpenRosa requires clients to send a UUID (in
    # `<instanceID>`) within every submission; if the incoming XML has a UUID
    # and still exactly matches an existing submission, it's certainly a
    # duplicate (https://docs.opendatakit.org/openrosa-metadata/#fields).
    if xform.has_start_time or new_uuid is not None:
        # XML matches are identified by identical content hash OR, when a
        # content hash is not present, by string comparison of the full
        # content, which is slow! Use the management command
        # `populate_xml_hashes_for_instances` to hash existing submissions
        existing_instance = Instance.objects.filter(
            Q(xml_hash=xml_hash)
            | Q(xml_hash=Instance.DEFAULT_XML_HASH, xml=xml),
            xform__user=xform.user,
        ).first()
    else:
        existing_instance = None

    if existing_instance:
        # ensure we have saved the extra attachments
        any_new_attachment = save_attachments(existing_instance, media_files)
        if not any_new_attachment:
            raise DuplicateInstance()
        else:
            # Update Mongo via the related ParsedInstance
            existing_instance.parsed_instance.save(asynchronous=False)
            return existing_instance
    else:
        instance = save_submission(xform, xml, media_files, new_uuid,
                                   submitted_by, status, date_created_override)
        return instance
Exemple #25
0
def create_instance(username, xml_file, media_files,
                    status=u'submitted_via_web', uuid=None,
                    date_created_override=None, request=None):
    """
    I used to check if this file had been submitted already, I've
    taken this out because it was too slow. Now we're going to create
    a way for an admin to mark duplicate instances. This should
    simplify things a bit.
    Submission cases:
        If there is a username and no uuid, submitting an old ODK form.
        If there is a username and a uuid, submitting a new ODK form.
    """
    with transaction.atomic():
        instance = None
        submitted_by = request.user \
            if request and request.user.is_authenticated() else None

        if username:
            username = username.lower()

        xml = xml_file.read()
        xml_hash = Instance.get_hash(xml)
        xform = get_xform_from_submission(xml, username, uuid)
        check_submission_permissions(request, xform)

        # Duplicate instances are identified by identical content hash OR, when
        # a content hash is not present, by string comparison of the full
        # content
        duplicate_instances = Instance.objects.filter(
            Q(xml_hash=xml_hash) |
                Q(xml_hash=Instance.DEFAULT_XML_HASH, xml=xml),
            xform__user=xform.user,
        )
        try:
            # Due to lazy QuerySet evaluation, the `filter()` above should not
            # hit the database. This index retrieval is our single query
            existing_instance = duplicate_instances[0]
        except IndexError:
            # No duplicate found
            pass
        else:
            if not existing_instance.xform or\
                    existing_instance.xform.has_start_time:
                # Ignore submission as a duplicate IFF
                #  * a submission's XForm collects start time
                #  * the submitted XML is an exact match with one that
                #    has already been submitted for that user.
                raise DuplicateInstance()

        # get new and deprecated uuid's
        new_uuid = get_uuid_from_xml(xml)
        # TODO: Make sure `uuid` is indexed by the DB!
        duplicate_instances = Instance.objects.filter(uuid=new_uuid)

        if duplicate_instances:
            # ensure we have saved the extra attachments
            for f in media_files:
                Attachment.objects.get_or_create(
                    instance=duplicate_instances[0],
                    media_file=f, mimetype=f.content_type)
        else:
            instance = save_submission(xform, xml, media_files, new_uuid,
                                       submitted_by, status,
                                       date_created_override)
            return instance

    if duplicate_instances:
        # We are now outside the atomic block, so we can raise an exception
        # without rolling back the extra attachments we created earlier
        # NB: Since `ATOMIC_REQUESTS` is set at the database level, everything
        # could still be rolled back if the calling view fails to handle an
        # exception
        raise DuplicateInstance()
    def test_merged_datasets_data(self):
        """Test retrieving data of a merged dataset"""
        merged_dataset = self._create_merged_dataset()
        request = self.factory.get('/', **self.extra)
        view = MergedXFormViewSet.as_view({'get': 'data'})
        merged_xform = MergedXForm.objects.get(pk=merged_dataset['id'])
        detail_view = MergedXFormViewSet.as_view({
            'get': 'retrieve',
        })
        xform_detail_view = XFormViewSet.as_view({
            'get': 'retrieve',
        })

        response = view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(len(response.data), 0)

        # check num_of_submissions
        response = detail_view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.data['num_of_submissions'], 0)

        # make submission to form a
        form_a = merged_xform.xforms.all()[0]
        xml = '<data id="a"><fruit>orange</fruit></data>'
        Instance(xform=form_a, xml=xml).save()
        response = view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(len(response.data), 1)

        fruit = [d['fruit'] for d in response.data]
        expected_fruit = ['orange']
        self.assertEqual(fruit, expected_fruit)

        # check num_of_submissions
        response = detail_view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.data['num_of_submissions'], 1)

        # make submission to form b
        form_b = merged_xform.xforms.all()[1]
        xml = '<data id="b"><fruit>mango</fruit></data>'
        last_submission = Instance(xform=form_b, xml=xml)
        last_submission.save()
        response = view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(len(response.data), 2)

        fruit = [d['fruit'] for d in response.data]
        expected_fruit = ['orange', 'mango']
        self.assertEqual(fruit, expected_fruit)

        # check num_of_submissions /merged-datasets/[pk]
        response = detail_view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.data['num_of_submissions'], 2)

        # check last_submission_time
        self.assertEqual(response.data['last_submission_time'],
                         last_submission.date_created.isoformat())

        # check num_of_submissions /forms/[pk]
        response = xform_detail_view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.data['num_of_submissions'], 2)

        # check last_submission_time
        self.assertEqual(response.data['last_submission_time'],
                         last_submission.date_created.isoformat())
    def handle(self, *args, **options):
        verbosity = options['verbosity']
        if len(list(connections)) > 1:
            raise NotImplementedError(
                "This management command does not support multiple-database "
                "configurations"
            )
        connection = connections['default']
        if connection.Database.__name__ != 'psycopg2':
            raise NotImplementedError(
                "Only the `psycopg2` database backend is supported")

        instances = Instance.objects.all().order_by('pk')
        xforms = XForm.objects.all()
        for option in (
                'pk__gte',
                'xform__id_string',
                'xform__user__username',
                'xml__contains'
        ):
            if options[option]:
                instances = instances.filter(**{option: options[option]})
                if option.startswith('xform__'):
                    xforms = xforms.filter(
                        **{option[len('xform__'):]: options[option]}
                    )

        instances = instances.annotate(
            root_node_name=Func(
                F('xml'),
                Value(INSTANCE_ROOT_NODE_NAME_PATTERN),
                function='regexp_matches'
            )
        ).values_list('pk', 'xform_id', 'root_node_name')
        if not instances.exists():
            self.stderr.write('No Instances found.')
            return
        t0 = time.time()
        self.stderr.write(
            'Fetching Instances; please allow several minutes...', ending='')
        instances = list(instances)
        self.stderr.write(
            'got {} in {} seconds.'.format(
                len(instances),
                int(time.time() - t0)
            )
        )

        # Getting the XForm root node names separately is far more efficient
        # than calling `regexp_matches` on `xform__xml` in the `Instance` query
        xforms = xforms.annotate(
            root_node_name=Func(
                F('xml'),
                Value(XFORM_ROOT_NODE_NAME_PATTERN),
                function='regexp_matches'
           )
        ).values_list('pk', 'root_node_name')
        self.stderr.write('Fetching XForm root node names...', ending='')
        t0 = time.time()
        xform_root_node_names = dict(xforms)
        self.stderr.write(
            'got {} in {} seconds.'.format(
                len(xform_root_node_names),
                int(time.time() - t0)
            )
        )

        completed_instances = 0
        changed_instances = 0
        failed_instances = 0
        progress_interval = 1 # second
        t0 = time.time()
        t_last = t0

        self.stdout.write(
            'Instance\tXForm\tOld Root Node Name\tNew Root Node Name')
        for instance in instances:
            t_now = time.time()
            if (verbosity > 1 and t_now - t_last >= progress_interval
                    and completed_instances):
                t_last = t_now
                t_elapsed = t_now - t0
                write_same_line(
                    self.stderr,
                    'Completed {} Instances: {} changed, {} failed; '
                    '{}s elapsed, {} Instance/sec.'.format(
                        completed_instances,
                        changed_instances,
                        failed_instances,
                        int(t_elapsed),
                        int(completed_instances / t_elapsed)
                    )
                )

            instance_id = instance[0]
            xform_id = instance[1]
            # `regexp_matches` results come back as `list`s from the ORM
            instance_root_node_name = instance[2]
            xform_root_node_name = xform_root_node_names[xform_id]
            if not len(instance_root_node_name) == 1:
                self.stderr.write(
                    '!!! Failed to get root node name for Instance {}'.format(
                        instance_id)
                )
                failed_instances += 1
                completed_instances += 1
                continue
            if not len(xform_root_node_name) == 1:
                self.stderr.write(
                    '!!! Failed to get root node name for XForm {}'.format(
                        xform_id)
                )
                failed_instances += 1
                completed_instances += 1
                continue

            instance_root_node_name = instance_root_node_name[0]
            xform_root_node_name = xform_root_node_name[0]
            if instance_root_node_name == xform_root_node_name:
                completed_instances += 1
                continue

            queryset = Instance.objects.filter(pk=instance_id).only('xml')
            fixed_xml = replace_first_and_last(
                queryset[0].xml, instance_root_node_name, xform_root_node_name)
            new_xml_hash = Instance.get_hash(fixed_xml)
            queryset.update(xml=fixed_xml, xml_hash=new_xml_hash)
            self.stdout.write('{}\t{}\t{}\t{}'.format(
                instance_id, xform_id,
                instance_root_node_name, xform_root_node_name
            ))
            changed_instances += 1
            completed_instances += 1

        self.stderr.write(
            '\nFinished {} Instances: {} changed, {} failed.'.format(
                completed_instances,
                changed_instances,
                failed_instances
            )
        )
        self.stdout.write(
            'At the start of processing, the last instance PK '
            'was {}.'.format(instance_id)
        )
Exemple #28
0
def create_instance(username,
                    xml_file,
                    media_files,
                    status=u'submitted_via_web',
                    uuid=None,
                    date_created_override=None,
                    request=None):
    """
    I used to check if this file had been submitted already, I've
    taken this out because it was too slow. Now we're going to create
    a way for an admin to mark duplicate instances. This should
    simplify things a bit.
    Submission cases:
        If there is a username and no uuid, submitting an old ODK form.
        If there is a username and a uuid, submitting a new ODK form.
    """
    with transaction.atomic():
        instance = None
        submitted_by = request.user \
            if request and request.user.is_authenticated() else None

        if username:
            username = username.lower()

        xml = xml_file.read()
        xml_hash = Instance.get_hash(xml)
        xform = get_xform_from_submission(xml, username, uuid)
        check_submission_permissions(request, xform)

        # Duplicate instances are identified by identical content hash OR, when
        # a content hash is not present, by string comparison of the full
        # content
        duplicate_instances = Instance.objects.filter(
            Q(xml_hash=xml_hash)
            | Q(xml_hash=Instance.DEFAULT_XML_HASH, xml=xml),
            xform__user=xform.user,
        )
        try:
            # Due to lazy QuerySet evaluation, the `filter()` above should not
            # hit the database. This index retrieval is our single query
            existing_instance = duplicate_instances[0]
        except IndexError:
            # No duplicate found
            pass
        else:
            if not existing_instance.xform or\
                    existing_instance.xform.has_start_time:
                # Ignore submission as a duplicate IFF
                #  * a submission's XForm collects start time
                #  * the submitted XML is an exact match with one that
                #    has already been submitted for that user.
                raise DuplicateInstance()

        # get new and deprecated uuid's
        new_uuid = get_uuid_from_xml(xml)
        # TODO: Make sure `uuid` is indexed by the DB!
        duplicate_instances = Instance.objects.filter(uuid=new_uuid)

        if duplicate_instances:
            # ensure we have saved the extra attachments
            for f in media_files:
                Attachment.objects.get_or_create(
                    instance=duplicate_instances[0],
                    media_file=f,
                    mimetype=f.content_type)
        else:
            instance = save_submission(xform, xml, media_files, new_uuid,
                                       submitted_by, status,
                                       date_created_override)
            return instance

    if duplicate_instances:
        # We are now outside the atomic block, so we can raise an exception
        # without rolling back the extra attachments we created earlier
        # NB: Since `ATOMIC_REQUESTS` is set at the database level, everything
        # could still be rolled back if the calling view fails to handle an
        # exception
        raise DuplicateInstance()
Exemple #29
0
    def create_new_submission(self, request, site, form):
        fs_xf = FieldSightXF.objects.get(pk=form)
        xform = fs_xf.xf
        xml_file_list = self.request.FILES.pop('xml_submission_file', [])
        xml_file = xml_file_list[0] if len(xml_file_list) else None
        xml = xml_file.read()
        username = self.kwargs.get('username')
        user = get_object_or_404(User, username=username)
        media_files = request.FILES.values()
        new_uuid = get_uuid_from_xml(xml)
        site_id = site

        xml_hash = Instance.get_hash(xml)

        if xform.has_start_time:
            # XML matches are identified by identical content hash OR, when a
            # content hash is not present, by string comparison of the full
            # content, which is slow! Use the management command
            # `populate_xml_hashes_for_instances` to hash existing submissions
            existing_instance = Instance.objects.filter(
                Q(xml_hash=xml_hash)
                | Q(xml_hash=Instance.DEFAULT_XML_HASH, xml=xml),
                xform__user=xform.user,
            ).first()
        else:
            existing_instance = None

        if existing_instance:
            # ensure we have saved the extra attachments
            any_new_attachment = save_attachments(existing_instance,
                                                  media_files)

            if not any_new_attachment:
                raise DuplicateInstance()
            else:
                context = self.get_serializer_context()
                serializer = SubmissionSerializer(existing_instance,
                                                  context=context)
                return Response(serializer.data,
                                headers=self.get_openrosa_headers(request),
                                status=status.HTTP_201_CREATED,
                                template_name=self.template_name)
        with transaction.atomic():
            if fs_xf.is_survey:
                instance = save_submission(
                    xform=xform,
                    xml=xml,
                    media_files=media_files,
                    new_uuid=new_uuid,
                    submitted_by=user,
                    status='submitted_via_web',
                    date_created_override=None,
                    fxid=None,
                    site=None,
                    fs_poj_id=fs_xf.id,
                    project=fs_xf.project.id,
                )
            else:
                if fs_xf.site:
                    instance = save_submission(
                        xform=xform,
                        xml=xml,
                        media_files=media_files,
                        new_uuid=new_uuid,
                        submitted_by=user,
                        status='submitted_via_web',
                        date_created_override=None,
                        fxid=fs_xf.id,
                        site=site_id,
                    )
                else:
                    instance = save_submission(
                        xform=xform,
                        xml=xml,
                        media_files=media_files,
                        new_uuid=new_uuid,
                        submitted_by=user,
                        status='submitted_via_web',
                        date_created_override=None,
                        fxid=None,
                        site=site_id,
                        fs_poj_id=fs_xf.id,
                        project=fs_xf.project.id,
                    )
                    task_obj = CeleryTaskProgress.objects.create(
                        user=user,
                        description='Change site info',
                        task_type=25,
                        content_object=instance.fieldsight_instance)
                    if task_obj:
                        from onadata.apps.fieldsight.tasks import \
                            update_meta_details
                        update_meta_details.apply_async(
                            (fs_xf.id, instance.id, task_obj.id, site_id),
                            countdown=1)
                    else:
                        from onadata.apps.fieldsight.tasks import \
                            update_meta_details
                        update_meta_details.apply_async(
                            (fs_xf.id, instance.id, 0, site_id), countdown=1)

            noti_type = 16
            title = "new submission"

            if instance.fieldsight_instance.site:
                extra_object = instance.fieldsight_instance.site
                extra_message = ""
                project = extra_object.project
                site = extra_object
                organization = extra_object.project.organization

            else:
                extra_object = instance.fieldsight_instance.project
                extra_message = "project"
                project = extra_object
                site = None
                organization = extra_object.organization

            instance.fieldsight_instance.logs.create(
                source=user,
                type=noti_type,
                title=title,
                organization=organization,
                project=project,
                site=site,
                extra_object=extra_object,
                extra_message=extra_message,
                content_object=instance.fieldsight_instance)

        context = self.get_serializer_context()
        serializer = SubmissionSerializer(instance, context=context)
        return Response(serializer.data,
                        headers=self.get_openrosa_headers(request),
                        status=status.HTTP_201_CREATED,
                        template_name=self.template_name)
Exemple #30
0
    def handle(self, *args, **options):
        verbosity = options['verbosity']
        if len(list(connections)) > 1:
            raise NotImplementedError(
                "This management command does not support multiple-database "
                "configurations")
        connection = connections['default']
        if connection.Database.__name__ != 'psycopg2':
            raise NotImplementedError(
                "Only the `psycopg2` database backend is supported")

        instances = Instance.objects.all().order_by('pk')
        xforms = XForm.objects.all()
        for option in ('pk__gte', 'xform__id_string', 'xform__user__username',
                       'xml__contains'):
            if options[option]:
                instances = instances.filter(**{option: options[option]})
                if option.startswith('xform__'):
                    xforms = xforms.filter(
                        **{option[len('xform__'):]: options[option]})

        instances = instances.annotate(
            root_node_name=Func(F('xml'),
                                Value(INSTANCE_ROOT_NODE_NAME_PATTERN),
                                function='regexp_matches')).values_list(
                                    'pk', 'xform_id', 'root_node_name')
        if not instances.exists():
            self.stderr.write('No Instances found.')
            return
        t0 = time.time()
        self.stderr.write(
            'Fetching Instances; please allow several minutes...', ending='')
        instances = list(instances)
        self.stderr.write('got {} in {} seconds.'.format(
            len(instances), int(time.time() - t0)))

        # Getting the XForm root node names separately is far more efficient
        # than calling `regexp_matches` on `xform__xml` in the `Instance` query
        xforms = xforms.annotate(
            root_node_name=Func(F('xml'),
                                Value(XFORM_ROOT_NODE_NAME_PATTERN),
                                function='regexp_matches')).values_list(
                                    'pk', 'root_node_name')
        self.stderr.write('Fetching XForm root node names...', ending='')
        t0 = time.time()
        xform_root_node_names = dict(xforms)
        self.stderr.write('got {} in {} seconds.'.format(
            len(xform_root_node_names), int(time.time() - t0)))

        completed_instances = 0
        changed_instances = 0
        failed_instances = 0
        progress_interval = 1  # second
        t0 = time.time()
        t_last = t0

        self.stdout.write(
            'Instance\tXForm\tOld Root Node Name\tNew Root Node Name')
        for instance in instances:
            t_now = time.time()
            if (verbosity > 1 and t_now - t_last >= progress_interval
                    and completed_instances):
                t_last = t_now
                t_elapsed = t_now - t0
                write_same_line(
                    self.stderr,
                    'Completed {} Instances: {} changed, {} failed; '
                    '{}s elapsed, {} Instance/sec.'.format(
                        completed_instances, changed_instances,
                        failed_instances, int(t_elapsed),
                        int(completed_instances / t_elapsed)))

            instance_id = instance[0]
            xform_id = instance[1]
            # `regexp_matches` results come back as `list`s from the ORM
            instance_root_node_name = instance[2]
            xform_root_node_name = xform_root_node_names[xform_id]
            if not len(instance_root_node_name) == 1:
                self.stderr.write(
                    '!!! Failed to get root node name for Instance {}'.format(
                        instance_id))
                failed_instances += 1
                completed_instances += 1
                continue
            if not len(xform_root_node_name) == 1:
                self.stderr.write(
                    '!!! Failed to get root node name for XForm {}'.format(
                        xform_id))
                failed_instances += 1
                completed_instances += 1
                continue

            instance_root_node_name = instance_root_node_name[0]
            xform_root_node_name = xform_root_node_name[0]
            if instance_root_node_name == xform_root_node_name:
                completed_instances += 1
                continue

            queryset = Instance.objects.filter(pk=instance_id).only('xml')
            fixed_xml = replace_first_and_last(queryset[0].xml,
                                               instance_root_node_name,
                                               xform_root_node_name)
            new_xml_hash = Instance.get_hash(fixed_xml)
            queryset.update(xml=fixed_xml, xml_hash=new_xml_hash)
            self.stdout.write('{}\t{}\t{}\t{}'.format(instance_id, xform_id,
                                                      instance_root_node_name,
                                                      xform_root_node_name))
            changed_instances += 1
            completed_instances += 1

        self.stderr.write(
            '\nFinished {} Instances: {} changed, {} failed.'.format(
                completed_instances, changed_instances, failed_instances))
        self.stdout.write('At the start of processing, the last instance PK '
                          'was {}.'.format(instance_id))
    def test_merged_datasets_data(self):
        """Test retrieving data of a merged dataset"""
        merged_dataset = self._create_merged_dataset()
        request = self.factory.get('/', **self.extra)
        view = MergedXFormViewSet.as_view({'get': 'data'})
        merged_xform = MergedXForm.objects.get(pk=merged_dataset['id'])
        detail_view = MergedXFormViewSet.as_view({
            'get': 'retrieve',
        })
        xform_detail_view = XFormViewSet.as_view({
            'get': 'retrieve',
        })

        response = view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(len(response.data), 0)

        # check num_of_submissions
        response = detail_view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.data['num_of_submissions'], 0)

        # make submission to form a
        form_a = merged_xform.xforms.all()[0]
        xml = '<data id="a"><fruit>orange</fruit></data>'
        Instance(xform=form_a, xml=xml).save()
        response = view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(len(response.data), 1)

        fruit = [d['fruit'] for d in response.data]
        expected_fruit = ['orange']
        self.assertEqual(fruit, expected_fruit)

        # check num_of_submissions
        response = detail_view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.data['num_of_submissions'], 1)

        # make submission to form b
        form_b = merged_xform.xforms.all()[1]
        xml = '<data id="b"><fruit>mango</fruit></data>'
        last_submission = Instance(xform=form_b, xml=xml)
        last_submission.save()
        response = view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(len(response.data), 2)

        fruit = [d['fruit'] for d in response.data]
        expected_fruit = ['orange', 'mango']
        self.assertEqual(fruit, expected_fruit)

        # check num_of_submissions /merged-datasets/[pk]
        response = detail_view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.data['num_of_submissions'], 2)

        # check last_submission_time
        self.assertEqual(response.data['last_submission_time'],
                         last_submission.date_created.isoformat())

        # check num_of_submissions /forms/[pk]
        response = xform_detail_view(request, pk=merged_dataset['id'])
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.data['num_of_submissions'], 2)

        # check last_submission_time
        self.assertEqual(response.data['last_submission_time'],
                         last_submission.date_created.isoformat())