def test_refresh_doc_meta(self):

        self.set_up()
        mr = MasterRefresh(self.user.id ,self.document.id)

        source_submissions_data = SourceSubmission.objects\
            .filter(document_id = self.document.id)\
            .values('id','submission_json')

        raw_indicator_list = [k for k,v in json\
            .loads(source_submissions_data[0]['submission_json']).iteritems()]

        mr.refresh_doc_meta()

        source_indicator_id_for_this_doc = DocumentSourceObjectMap.objects\
            .filter(id__in= SourceObjectMap.objects.filter(\
                    content_type = 'indicator',
                    source_object_code__in = raw_indicator_list
                ).values_list('id',flat=True)
            )

        ## -3 because we dont want campaign_col,indicator_col and unique_col
        ## showing up as indicators to map!
        self.assertEqual(len(source_indicator_id_for_this_doc)\
            ,len(raw_indicator_list) -3 )
Example #2
0
    def odk_form_data_to_datapoints(self):
        '''
        First transalate the csv into source datapoitns ( very similair to the
        csv upload process), and once the sdps are created, call
        "refresh_master" and pass the list of sdps.
        '''

        self.to_process_df.columns = map(str.lower, self.to_process_df)
        column_list = self.to_process_df.columns.tolist()

        #### DataFrame --> Source DP ####
        all_sdps = []
        for row_number, row in enumerate(self.to_process_df.values):

            row_dict = dict(zip(column_list,row))
            sdps = self.process_row(row_dict,row_number)
            all_sdps.extend(sdps)

        #### Source DP --> Master DP ####

        mr = MasterRefresh(self.user_id,self.document_id)
        dps = mr.source_dps_to_dps()

        response_string = 'created %s new soure_datapoints by processing: %s \
            nows for document_id: %s, yielding in: %s master datapoints' % \
            (len(all_sdps),len(self.to_process_df),self.document_id,len(dps))

        return response_string
Example #3
0
File: api.py Project: marks/polio
    def odk_refresh_master(self):
        '''
        A refresh master method that deals specifically with source_datpaoints
        from odk.  First we find the source_datapoint_ids that we need from the
        source ( odk ) and then we pass those IDs as well as the user to
        the MasterRefresh cache.  The system knows to only then refresh
        datapoints that have been newly created from ODK.
        '''

        try:
            source_datapoints = SourceDataPoint.objects.filter(
                status_id = ProcessStatus.objects.get(status_text='TO_PROCESS'),
                source_id = Source.objects.get(source_name='odk'))

            m = MasterRefresh(source_datapoints,self.user_id)
            m.main()

            dp_count = len(m.new_datapoints)
            success_msg = 'SUCSSFULLY CREATED: ' + str(dp_count) +\
                ' NEW DATPOINTS'

        except Exception:
            err = format_exc()
            return err, None

        return None, success_msg
Example #4
0
    def test_refresh_doc_meta(self):

        self.set_up()
        mr = MasterRefresh(self.user.id ,self.document.id)

        ## load this w read_csv(self.test_file_location) remove DocTransform
        source_submissions_data = SourceSubmission.objects\
            .filter(document_id = self.document.id)\
            .values('id','submission_json')

        raw_indicator_list = [k for k,v in json\
            .loads(source_submissions_data[0]['submission_json']).iteritems()]

        mr.refresh_doc_meta()

        source_indicator_id_for_this_doc = DocumentSourceObjectMap.objects\
            .filter(id__in= SourceObjectMap.objects.filter(\
                    content_type = 'indicator',
                    source_object_code__in = raw_indicator_list
                ).values_list('id',flat=True)
            )

        ## should be more specific here.. but this proves with a high degree ##
        ## of certainty that the source_object_map rows have been created ##
        self.assertEqual(len(source_indicator_id_for_this_doc)\
            ,len(raw_indicator_list))
Example #5
0
File: views.py Project: marks/polio
def refresh_master_no_indicator(request, document_id):

    mr = MasterRefresh(document_id=document_id, indicator_id=None, user_id=request.user.id)

    mr.source_dps_to_dps()

    return HttpResponseRedirect(reverse("source_data:field_mapping", kwargs={"document_id": document_id}))
Example #6
0
    def test_refresh_doc_meta(self):

        self.set_up()
        mr = MasterRefresh(self.user.id, self.document.id)

        source_submissions_data = SourceSubmission.objects\
            .filter(document_id = self.document.id)\
            .values('id','submission_json')

        raw_indicator_list = [k for k,v in json\
            .loads(source_submissions_data[0]['submission_json']).iteritems()]

        mr.refresh_doc_meta()

        source_indicator_id_for_this_doc = DocumentSourceObjectMap.objects\
            .filter(id__in= SourceObjectMap.objects.filter(\
                    content_type = 'indicator',
                    source_object_code__in = raw_indicator_list
                ).values_list('id',flat=True)
            )

        ## -3 because we dont want campaign_col,indicator_col and unique_col
        ## showing up as indicators to map!
        self.assertEqual(len(source_indicator_id_for_this_doc)\
            ,len(raw_indicator_list) -3 )
Example #7
0
    def do(self):
        user_id = User.objects.get(username = '******').id
        document_id = SourceSubmission.objects.\
            filter(process_status='TO_PROCESS').\
            values_list('document_id',flat=True)[0]

        mr = MasterRefresh(user_id, document_id)
        mr.main()
Example #8
0
    def do(self):
        user_id = User.objects.get(username='******').id
        document_id = SourceSubmission.objects.\
            filter(process_status='TO_PROCESS').\
            values_list('document_id',flat=True)[0]

        mr = MasterRefresh(user_id, document_id)
        mr.main()
Example #9
0
    def get_object_list(self, request):
        document_id = request.GET['document_id']

        mr = MasterRefresh(request.user.id, document_id)
        mr.main()

        doc_detail, created = DocumentDetail.objects.update_or_create(
            document_id=document_id,
            doc_detail_type_id=DocDetailType.objects.get(
                name='submission_processed_count').id,
            defaults={
                'doc_detail_value':
                SourceSubmission.objects.filter(
                    process_status='PROCESSED',
                    document_id=document_id).count()
            },
        )

        doc_detail, created = DocumentDetail.objects.update_or_create(
            document_id=document_id,
            doc_detail_type_id=DocDetailType.objects.get(
                name='doc_datapoint_count').id,
            defaults={
                'doc_detail_value':
                DocDataPoint.objects.filter(document_id=document_id).count()
            },
        )

        doc_detail, created = DocumentDetail.objects.update_or_create(
            document_id=document_id,
            doc_detail_type_id=DocDetailType.objects.get(
                name='datapoint_count').id,
            defaults={
                'doc_detail_value':
                DataPoint.objects.filter(
                    source_submission_id__in=SourceSubmission.objects.filter(
                        document_id=document_id).values_list(
                            'id', flat=True)).count()
            },
        )

        queryset = DocumentDetail.objects \
            .filter(document_id=document_id).values()

        return queryset
Example #10
0
    def test_submission_detail_refresh(self,):

        self.set_up()
        mr = MasterRefresh(self.user.id ,self.document.id)

        source_submissions_data = SourceSubmission.objects\
            .filter(document_id = self.document.id)\
            .values_list('id',flat=True)

        # = read_csv(self.test_file)
        ## fake the submission_data

        mr.refresh_submission_details()
        submission_details = SourceSubmission.objects\
            .filter(document_id = self.document.id)

        self.assertEqual(len(source_submissions_data)\
            ,len(submission_details))
Example #11
0
    def test_refresh_master_init(self):

        self.set_up()

        mr = MasterRefresh(self.user.id\
            ,self.document.id)

        self.assertTrue(isinstance,(mr,MasterRefresh))
        self.assertEqual(self.document.id,mr.document_id)
Example #12
0
    def test_mapping(self):
        '''
        Here we ensure that after mapping all of the meta data that we have the
        expected number of rows with the appropiate data associated.
        '''

        self.set_up()

        mr = MasterRefresh(self.source_datapoints,self.user.id\
            ,self.document.id,self.indicator.id)

        ## create the source metadata ##
        create_source_meta_data(self.document.id)

        ## create mappings ( this is mimicking how bo would map metadata ) ##
        rm_1 = RegionMap.objects.create(
            mapped_by_id = self.user.id,
            source_id = SourceRegion.objects.get(region_code=self\
                .region_1_code).id,
            master_id = self.region_1.id)

        cm_1 = CampaignMap.objects.create(
            mapped_by_id = self.user.id,
            source_id = SourceCampaign.objects.get(campaign_string=\
                self.campaign_string).id,
            master_id = self.campaign.id)

        im_1 = IndicatorMap.objects.create(
            mapped_by_id = self.user.id,
            source_id = SourceIndicator.objects.get(indicator_string=\
                self.indicator_string).id,
            master_id = self.indicator.id)

        mr.source_dps_to_dps()

        x = DataPoint.objects.filter(
            region_id = self.region_1.id,
            campaign_id = self.campaign.id,
            indicator_id = self.indicator.id,
        )

        self.assertEqual(len(x),1)
Example #13
0
    def get_object_list(self, request):

        document_id = request.GET["document_id"]

        mr = MasterRefresh(request.user.id, document_id)
        mr.submissions_to_doc_datapoints()
        mr.sync_datapoint()

        cr = CacheRefresh()
        cr.main()

        ## upsert document meta from the last run ##

        ## fixme -> abstract this <- ##

        doc_detail, created = DocumentDetail.objects.update_or_create(
            document_id=document_id,
            doc_detail_type_id=DocDetailType.objects.get(name="submission_processed_count").id,
            defaults={"doc_detail_value": SourceSubmission.objects.filter(process_status="PROCEESED").count()},
        )

        doc_detail, created = DocumentDetail.objects.update_or_create(
            document_id=document_id,
            doc_detail_type_id=DocDetailType.objects.get(name="doc_datapoint_count").id,
            defaults={"doc_detail_value": DocDataPoint.objects.filter(document_id=document_id).count()},
        )

        doc_detail, created = DocumentDetail.objects.update_or_create(
            document_id=document_id,
            doc_detail_type_id=DocDetailType.objects.get(name="datapoint_count").id,
            defaults={
                "doc_detail_value": DataPoint.objects.filter(
                    source_submission_id__in=SourceSubmission.objects.filter(document_id=document_id).values_list(
                        "id", flat=True
                    )
                ).count()
            },
        )

        queryset = DocumentDetail.objects.filter(document_id=document_id).values()

        return queryset
Example #14
0
    def get_object_list(self,request):

        document_id = request.GET['document_id']

        mr = MasterRefresh(request.user.id, document_id)
        mr.submissions_to_doc_datapoints()
        mr.sync_datapoint()

        cr = CacheRefresh()
        cr.main()

        doc_detail, created = DocumentDetail.objects.update_or_create(
            document_id = document_id,
            doc_detail_type_id = DocDetailType.objects.get(name = 'submission_processed_count').id,
            defaults= {'doc_detail_value': SourceSubmission.objects\
                .filter(process_status = 'PROCESSED',\
                    document_id = document_id).count()\
            },
        )

        doc_detail, created = DocumentDetail.objects.update_or_create(
            document_id = document_id,
            doc_detail_type_id = DocDetailType.objects.get(name = 'doc_datapoint_count').id,
            defaults= {'doc_detail_value': DocDataPoint.objects\
                .filter(document_id = document_id).count()\
            },
        )

        doc_detail, created = DocumentDetail.objects.update_or_create(
            document_id = document_id,
            doc_detail_type_id = DocDetailType.objects.get(name = 'datapoint_count').id,
            defaults= {'doc_detail_value': DataPoint.objects\
                .filter(source_submission_id__in=SourceSubmission.objects\
                .filter(document_id = document_id).values_list('id',flat=True)).count()\
            },
        )

        queryset = DocumentDetail.objects\
            .filter(document_id=document_id).values()

        return queryset
Example #15
0
    def refresh_master_for_document(self, list_of_object_ids):


        mr = MasterRefresh(self.user_id,self.document_id)
        qset = Document.objects.raw('''

            DROP TABLE IF EXISTS _this_doc;
            CREATE TEMP TABLE _this_doc AS
            SELECT
                sd.*
            FROM
            source_data_document sd
            WHERE id = %s;

            SELECT
                td.*
                ,x.doc_datapoint_cnt
				,y.source_submission_total_cnt
				,z.source_submission_to_process_cnt
            FROM _this_doc td
            INNER JOIN (
                SELECT COUNT(1) as doc_datapoint_cnt
                FROM doc_datapoint dd
                WHERE EXISTS (
                        SELECT 1 FROM _this_doc td
                        WHERE dd.document_id = td.id
                    )
                )x
            ON 1=1
            INNER JOIN (
                SELECT COUNT(1) as source_submission_total_cnt
                FROM source_submission ss
                WHERE EXISTS (
                        SELECT 1 FROM _this_doc td
                        WHERE ss.document_id = td.id
                    )
                )y
            ON 1=1
            INNER JOIN (
                SELECT COUNT(1) as source_submission_to_process_cnt
                FROM source_submission ss
                WHERE EXISTS (
                        SELECT 1 FROM _this_doc td
                        WHERE ss.document_id = td.id
                    )
                AND ss.process_status = 'TO_PROCESS'
                )z
            ON 1=1;

        ''',[self.document_id])

        return None, qset
Example #16
0
    def test_submission_detail_refresh(self, ):

        self.set_up()
        mr = MasterRefresh(self.user.id, self.document.id)
        mr.refresh_doc_meta()

        ## FIXME replace source_submission_data with read_csv(self.test_file)

        source_submissions_data = SourceSubmission.objects\
            .filter(document_id = self.document.id)\
            .values_list('id',flat=True)

        # = read_csv(self.test_file)
        ## fake the submission_data

        mr.refresh_submission_details()
        submission_details = SourceSubmission.objects\
            .filter(document_id = self.document.id)

        self.assertEqual(len(source_submissions_data)\
            ,len(submission_details))
Example #17
0
    def test_source_data_points_to_doc_datapoints(self):

        mr = MasterRefresh(self.user.id, self.document.id)
        doc_datapoint_ids = mr.process_doc_datapoints(self.source_submissions)

        self.assertTrue(doc_datapoint_ids)
Example #18
0
def refresh_master(request,document_id):

    mr = MasterRefresh(request.user.id, document_id)
    mr.main()

    return HttpResponseRedirect('/doc_review/overview/%s' % document_id)
Example #19
0
    def test_submission_to_datapoint(self):
        '''
        This simulates the following use case:

        As a user journey we can describe this test case as:
            - user uploads file ( see how set_up method calls DocTransform )
            - user maps metadata
            - user clicks " refresh master "
                -> user checks to see if data is correct
            - user realizes that the data is wrong, due to an invalid mapping
            - user re-mapps the data and clicks " refresh master"
                -> data from old mapping should be deleted and associated to
                   the newly mapped value

        TEST CASES:
            1. WHen the submission detail is refreshed, the location/campaign ids
               that we mapped should exist in that row.
            2. DocDataPoint records are created if the necessary mapping exists
            3. There are no zero or null values allowed in doc_datapoint
            4. The doc_datapoint from #3 is merged into datpaoint.
            5. I create mappings, sync data, realize the mapping was incorrect,
               re-map the metadata and the old data should be deleted, the new
               data created.
                 -> was the old data deleted?
                 -> was the new data created?
        '''

        self.set_up()

        submission_qs = SourceSubmission.objects\
            .filter(document_id = self.document.id)\
            .values_list('id','submission_json')[0]

        ss_id, first_submission = submission_qs[0],json.loads(submission_qs[1])

        location_code = first_submission[self.location_code_input_column]
        campaign_code = first_submission[self.campaign_code_input_column]
        raw_indicator_list = [k for k,v in first_submission.iteritems()]

        indicator_code = raw_indicator_list[-1]

        ## SIMULATED USER MAPPING ##
        ## see: datapoints/source-data/Nigeria/2015/06/mapping/2

        ## choose meta data values for the source_map update ##
        map_location_id = Location.objects.all()[0].id
        map_campaign_id = Campaign.objects.all()[0].id
        first_indicator_id = Indicator.objects.all()[0].id

        ## map location ##
        som_id_l = SourceObjectMap.objects.get(
            content_type = 'location',
            source_object_code = location_code,
        )
        som_id_l.master_object_id = map_location_id
        som_id_l.save()

        ## map campaign ##
        som_id_c = SourceObjectMap.objects.get(
            content_type = 'campaign',
            source_object_code = campaign_code,
        )
        som_id_c.master_object_id = map_campaign_id
        som_id_c.save()

        ## map indicator ##
        som_id_i = SourceObjectMap.objects.get(
            content_type = 'indicator',
            source_object_code = indicator_code,
        )
        som_id_i.master_object_id = first_indicator_id
        som_id_i.save()

        mr_with_new_meta = MasterRefresh(self.user.id ,self.document.id)
        mr_with_new_meta.refresh_submission_details()

        first_submission_detail = SourceSubmission.objects\
            .get(id = ss_id)

        ## Test Case 2 ##
        self.assertEqual(first_submission_detail.location_id, map_location_id)
        self.assertEqual(first_submission_detail.campaign_id, map_campaign_id)

        ## now that we have created the mappign, "refresh_master" ##
        ##         should create the relevant datapoints          ##

        mr_with_new_meta.submissions_to_doc_datapoints()
        doc_dp_ids = DocDataPoint.objects.filter(document_id =
            self.document.id)

        ## Test Case #3
        self.assertEqual(1,len(doc_dp_ids))

        mr_with_new_meta.sync_datapoint()
        dps = DataPoint.objects.all()

        ## Test Case #4
        self.assertEqual(1,len(dps))

        ## Test Case #5

        ## update the mapping with a new indicator value ##
        new_indicator_id = Indicator.objects.all()[1].id
        som_id_i.master_object_id = new_indicator_id
        som_id_i.save()

        mr_after_new_mapping = MasterRefresh(self.user.id ,self.document.id)
        mr_after_new_mapping.main()

        dp_with_new_indicator = DataPoint.objects.filter(indicator_id = \
            new_indicator_id)

        dp_with_old_indicator = DataPoint.objects.filter(indicator_id = \
            first_indicator_id)

        ## did new indicator flow through the system ?##
        self.assertEqual(1,len(dp_with_new_indicator))

        ## did the old indicator data get deleted?
        self.assertEqual(0,len(dp_with_old_indicator))
Example #20
0
    def test_submission_to_datapoint(self):
        '''
        This simulates a new document being processed with source_object_map
        records that have been sucessfullly mapped to master_ids but from a
        different document.

        A few things this method checks:
            1. there is a record in doc_object_map even for
              mappings that existed before this document was ingested.
            2. WHen the submission detail is refreshed, the reiogn/campaign ids
               that we mapped should exist in that row.
            2. DocDataPoint records are created if the necessary mapping exists
            3. There are no zero or null values allowed in doc_datapoint
            4. The doc_datapoint from #3 is merged into datpaoint.
        '''

        self.set_up()
        mr = MasterRefresh(self.user.id, self.document.id)

        submission_qs = SourceSubmission.objects\
            .filter(document_id = self.document.id)\
            .values_list('id','submission_json')[0]

        ss_id, first_submission = submission_qs[0], json.loads(
            submission_qs[1])

        location_code = first_submission[self.location_code_input_column]
        campaign_code = first_submission[self.campaign_code_input_column]
        raw_indicator_list = [k for k, v in first_submission.iteritems()]

        indicator_code = raw_indicator_list[-1]

        map_location_id = Location.objects.all()[0].id
        som_id_r = SourceObjectMap.objects.create(
            content_type='location',
            source_object_code=location_code,
            master_object_id=map_location_id,
            mapped_by_id=self.user.id)

        map_campaign_id = Campaign.objects.all()[0].id
        som_id_c = SourceObjectMap.objects.create(
            content_type='campaign',
            source_object_code=campaign_code,
            master_object_id=map_campaign_id,
            mapped_by_id=self.user.id)

        map_indicator_id = Indicator.objects.all()[0].id
        som_id_i = SourceObjectMap.objects.create(
            content_type='indicator',
            source_object_code=indicator_code,
            master_object_id=map_indicator_id,
            mapped_by_id=self.user.id)

        mr.refresh_doc_meta()

        doc_som_id_for_location_code = DocumentSourceObjectMap.objects\
            .filter(id__in= SourceObjectMap.objects.filter(\
                    content_type = 'location',
                    source_object_code = location_code,
                ).values_list('id',flat=True)
            ).values_list('id',flat=True)

        ## Test Case 1 ##
        self.assertEqual(len(doc_som_id_for_location_code), 1)  # 1

        mr_with_new_meta = MasterRefresh(self.user.id, self.document.id)
        mr_with_new_meta.refresh_submission_details()

        first_submission_detail = SourceSubmission.objects\
            .get(id = ss_id)

        ## Test Case 2 ##
        self.assertEqual(first_submission_detail.location_id, map_location_id)
        self.assertEqual(first_submission_detail.campaign_id, map_campaign_id)

        mr_with_new_meta.submissions_to_doc_datapoints()
        doc_dp_ids = DocDataPoint.objects.filter(document_id=self.document.id)

        ## Test Case #3
        self.assertEqual(1, len(doc_dp_ids))

        mr_with_new_meta.sync_datapoint()
        dps = DataPoint.objects.all()

        ## Test Case #4
        self.assertEqual(1, len(dps))
Example #21
0
def refresh_master(request, document_id):

    mr = MasterRefresh(request.user.id, document_id)
    mr.main()

    return HttpResponseRedirect('/doc_review/overview/%s' % document_id)
    def test_submission_to_datapoint(self):
        '''
        This simulates a new document being processed with source_object_map
        records that have been sucessfullly mapped to master_ids but from a
        different document.

        A few things this method checks:
            1. there is a record in doc_object_map even for
              mappings that existed before this document was ingested.
            2. WHen the submission detail is refreshed, the reiogn/campaign ids
               that we mapped should exist in that row.
            2. DocDataPoint records are created if the necessary mapping exists
            3. There are no zero or null values allowed in doc_datapoint
            4. The doc_datapoint from #3 is merged into datpaoint.
        '''

        self.set_up()
        mr = MasterRefresh(self.user.id ,self.document.id)

        submission_qs = SourceSubmission.objects\
            .filter(document_id = self.document.id)\
            .values_list('id','submission_json')[0]

        ss_id, first_submission = submission_qs[0],json.loads(submission_qs[1])

        location_code = first_submission[self.location_code_input_column]
        campaign_code = first_submission[self.campaign_code_input_column]
        raw_indicator_list = [k for k,v in first_submission.iteritems()]

        indicator_code = raw_indicator_list[-1]

        map_location_id = Location.objects.all()[0].id
        som_id_r = SourceObjectMap.objects.create(
            content_type = 'location',
            source_object_code = location_code,
            master_object_id = map_location_id,
            mapped_by_id = self.user.id
        )

        map_campaign_id = Campaign.objects.all()[0].id
        som_id_c = SourceObjectMap.objects.create(
            content_type = 'campaign',
            source_object_code = campaign_code,
            master_object_id = map_campaign_id,
            mapped_by_id = self.user.id
        )

        map_indicator_id = Indicator.objects.all()[0].id
        som_id_i = SourceObjectMap.objects.create(
            content_type = 'indicator',
            source_object_code = indicator_code,
            master_object_id = map_indicator_id,
            mapped_by_id = self.user.id
        )

        mr.refresh_doc_meta()

        doc_som_id_for_location_code = DocumentSourceObjectMap.objects\
            .filter(id__in= SourceObjectMap.objects.filter(\
                    content_type = 'location',
                    source_object_code = location_code,
                ).values_list('id',flat=True)
            ).values_list('id',flat=True)

        ## Test Case 1 ##
        self.assertEqual(len(doc_som_id_for_location_code),1) # 1

        mr_with_new_meta = mr = MasterRefresh(self.user.id ,self.document.id)
        first_submission_detail = SourceSubmissionDetail.objects\
            .get(source_submission_id = ss_id)

        ## Test Case 2 ##
        self.assertEqual(first_submission_detail.location_id, map_location_id)
        self.assertEqual(first_submission_detail.campaign_id, map_campaign_id)

        mr.submissions_to_doc_datapoints()
        doc_dp_ids = DocDataPoint.objects.filter(document_id =
            self.document.id)

        ## Test Case #3
        self.assertEqual(1,len(doc_dp_ids))

        mr.sync_datapoint()
        dps = DataPoint.objects.all()

        ## Test Case #4
        self.assertEqual(1,len(doc_dp_ids))