Example #1
0
    def main(self):
        '''
        from source_data.etl_tasks.refresh_master import MasterRefresh as mr
        x = mr(1,3)
        '''

        BATCH_SIZE = 25

        new_source_submission_ids = SourceSubmission.objects.filter(
            document_id = self.document_id
            ,process_status = 'TO_PROCESS'
        ).values_list('id',flat=True)

        to_process = new_source_submission_ids[:BATCH_SIZE]

        print '=== mapping === '
        source_object_map_ids = self.upsert_source_object_map\
            (to_process)

        print '== source_submissions =='
        SourceSubmission.objects.filter(id__in=to_process)\
            .update(process_status = 'PROCESSED')

        print '== doc datapoints =='
        doc_datapoint_ids = self.process_doc_datapoints\
            (to_process)

        print '== sync datapoints =='
        datapoint_ids = self.sync_doc_datapoint()

        print '== cache datapoints =='
        cr = CacheRefresh([d.id for d in datapoint_ids])
        computed_datapoint_ids = cr.main()

        return datapoint_ids
Example #2
0
    def test_agg(self):
        '''
        First refresh the new datapoints and then only refresh the cache for
        one datapoint_id and make sure agg uses all child data below even when
        that data is from a different job

        To Do - After the first cache_refresh, update the value and make sure
        that the aggregated total works.  Note - will need to use either
        ``transaction.atomic`` or ``TrasactionTestCase`` in order to persist
        multiple DB changes within one test
        '''
        raw_indicator_id, campaign_id, raw_region_id, agg_region_id = 22, 111,\
            12939, 12907

        self.set_up()
        self.create_raw_datapoints()

        agg_value_target = self.test_df = self.test_df[self.test_df['indicator_id'] ==\
             raw_indicator_id]['value'].sum()

        dp_id_to_refresh = DataPoint.objects.filter(
            region_id=raw_region_id,
            campaign_id=campaign_id,
            indicator_id=raw_indicator_id).values_list('id', flat=True)

        cr = CacheRefresh()

        ## now just try for one id (see POLIO-491 )
        cr = CacheRefresh(datapoint_id_list=list(dp_id_to_refresh))

        actual_value = self.get_dwc_value(agg_region_id,campaign_id,\
            raw_indicator_id)

        self.assertEqual(actual_value, agg_value_target)
Example #3
0
    def get_object_list(self, request):

        cr = CacheRefresh()
        cr.main()

        queryset = DocumentDetail.objects\
            .filter(document_id=1).values()

        return queryset
Example #4
0
    def get_object_list(self, request):
        cr = CacheRefresh()

        queryset = DocumentDetail.objects \
            .filter(document_id=1).values()

        return queryset
Example #5
0
    def get_object_list(self, request):

        document_id = request.GET["document_id"]

        mr = MasterRefresh(request.user.id, document_id)
        mr.submissions_to_doc_datapoints()
        mr.sync_datapoint()

        cr = CacheRefresh()
        cr.main()

        ## upsert document meta from the last run ##

        ## fixme -> abstract this <- ##

        doc_detail, created = DocumentDetail.objects.update_or_create(
            document_id=document_id,
            doc_detail_type_id=DocDetailType.objects.get(name="submission_processed_count").id,
            defaults={"doc_detail_value": SourceSubmission.objects.filter(process_status="PROCEESED").count()},
        )

        doc_detail, created = DocumentDetail.objects.update_or_create(
            document_id=document_id,
            doc_detail_type_id=DocDetailType.objects.get(name="doc_datapoint_count").id,
            defaults={"doc_detail_value": DocDataPoint.objects.filter(document_id=document_id).count()},
        )

        doc_detail, created = DocumentDetail.objects.update_or_create(
            document_id=document_id,
            doc_detail_type_id=DocDetailType.objects.get(name="datapoint_count").id,
            defaults={
                "doc_detail_value": DataPoint.objects.filter(
                    source_submission_id__in=SourceSubmission.objects.filter(document_id=document_id).values_list(
                        "id", flat=True
                    )
                ).count()
            },
        )

        queryset = DocumentDetail.objects.filter(document_id=document_id).values()

        return queryset
Example #6
0
    def get_object_list(self,request):

        document_id = request.GET['document_id']

        mr = MasterRefresh(request.user.id, document_id)
        mr.submissions_to_doc_datapoints()
        mr.sync_datapoint()

        cr = CacheRefresh()
        cr.main()

        doc_detail, created = DocumentDetail.objects.update_or_create(
            document_id = document_id,
            doc_detail_type_id = DocDetailType.objects.get(name = 'submission_processed_count').id,
            defaults= {'doc_detail_value': SourceSubmission.objects\
                .filter(process_status = 'PROCESSED',\
                    document_id = document_id).count()\
            },
        )

        doc_detail, created = DocumentDetail.objects.update_or_create(
            document_id = document_id,
            doc_detail_type_id = DocDetailType.objects.get(name = 'doc_datapoint_count').id,
            defaults= {'doc_detail_value': DocDataPoint.objects\
                .filter(document_id = document_id).count()\
            },
        )

        doc_detail, created = DocumentDetail.objects.update_or_create(
            document_id = document_id,
            doc_detail_type_id = DocDetailType.objects.get(name = 'datapoint_count').id,
            defaults= {'doc_detail_value': DataPoint.objects\
                .filter(source_submission_id__in=SourceSubmission.objects\
                .filter(document_id = document_id).values_list('id',flat=True)).count()\
            },
        )

        queryset = DocumentDetail.objects\
            .filter(document_id=document_id).values()

        return queryset
Example #7
0
    def test_basic(self):
        '''
        Using the calc_data.csv, create a test_df and target_df.  Ensure that
        the aggregation and calcuation are working properly, but ingesting the
        stored data, running the cache, and checking that the calculated data
        for the aggregate region (parent region, in this case Nigeria) is as
        expected.
        '''

        self.set_up()
        self.create_raw_datapoints()

        cr = CacheRefresh()

        for ix, row in self.target_df.iterrows():

            region_id, campaign_id, indicator_id = int(row.region_id),\
               int(row.campaign_id),int(row.indicator_id)

            actual_value = self.get_dwc_value(region_id, campaign_id,\
                indicator_id)

            self.assertEqual(row.value, actual_value)
Example #8
0
 def do(self):
     cr = CacheRefresh()