def main(self): ''' from source_data.etl_tasks.refresh_master import MasterRefresh as mr x = mr(1,3) ''' BATCH_SIZE = 25 new_source_submission_ids = SourceSubmission.objects.filter( document_id = self.document_id ,process_status = 'TO_PROCESS' ).values_list('id',flat=True) to_process = new_source_submission_ids[:BATCH_SIZE] print '=== mapping === ' source_object_map_ids = self.upsert_source_object_map\ (to_process) print '== source_submissions ==' SourceSubmission.objects.filter(id__in=to_process)\ .update(process_status = 'PROCESSED') print '== doc datapoints ==' doc_datapoint_ids = self.process_doc_datapoints\ (to_process) print '== sync datapoints ==' datapoint_ids = self.sync_doc_datapoint() print '== cache datapoints ==' cr = CacheRefresh([d.id for d in datapoint_ids]) computed_datapoint_ids = cr.main() return datapoint_ids
def test_agg(self): ''' First refresh the new datapoints and then only refresh the cache for one datapoint_id and make sure agg uses all child data below even when that data is from a different job To Do - After the first cache_refresh, update the value and make sure that the aggregated total works. Note - will need to use either ``transaction.atomic`` or ``TrasactionTestCase`` in order to persist multiple DB changes within one test ''' raw_indicator_id, campaign_id, raw_region_id, agg_region_id = 22, 111,\ 12939, 12907 self.set_up() self.create_raw_datapoints() agg_value_target = self.test_df = self.test_df[self.test_df['indicator_id'] ==\ raw_indicator_id]['value'].sum() dp_id_to_refresh = DataPoint.objects.filter( region_id=raw_region_id, campaign_id=campaign_id, indicator_id=raw_indicator_id).values_list('id', flat=True) cr = CacheRefresh() ## now just try for one id (see POLIO-491 ) cr = CacheRefresh(datapoint_id_list=list(dp_id_to_refresh)) actual_value = self.get_dwc_value(agg_region_id,campaign_id,\ raw_indicator_id) self.assertEqual(actual_value, agg_value_target)
def get_object_list(self, request): cr = CacheRefresh() cr.main() queryset = DocumentDetail.objects\ .filter(document_id=1).values() return queryset
def get_object_list(self, request): cr = CacheRefresh() queryset = DocumentDetail.objects \ .filter(document_id=1).values() return queryset
def get_object_list(self, request): document_id = request.GET["document_id"] mr = MasterRefresh(request.user.id, document_id) mr.submissions_to_doc_datapoints() mr.sync_datapoint() cr = CacheRefresh() cr.main() ## upsert document meta from the last run ## ## fixme -> abstract this <- ## doc_detail, created = DocumentDetail.objects.update_or_create( document_id=document_id, doc_detail_type_id=DocDetailType.objects.get(name="submission_processed_count").id, defaults={"doc_detail_value": SourceSubmission.objects.filter(process_status="PROCEESED").count()}, ) doc_detail, created = DocumentDetail.objects.update_or_create( document_id=document_id, doc_detail_type_id=DocDetailType.objects.get(name="doc_datapoint_count").id, defaults={"doc_detail_value": DocDataPoint.objects.filter(document_id=document_id).count()}, ) doc_detail, created = DocumentDetail.objects.update_or_create( document_id=document_id, doc_detail_type_id=DocDetailType.objects.get(name="datapoint_count").id, defaults={ "doc_detail_value": DataPoint.objects.filter( source_submission_id__in=SourceSubmission.objects.filter(document_id=document_id).values_list( "id", flat=True ) ).count() }, ) queryset = DocumentDetail.objects.filter(document_id=document_id).values() return queryset
def get_object_list(self,request): document_id = request.GET['document_id'] mr = MasterRefresh(request.user.id, document_id) mr.submissions_to_doc_datapoints() mr.sync_datapoint() cr = CacheRefresh() cr.main() doc_detail, created = DocumentDetail.objects.update_or_create( document_id = document_id, doc_detail_type_id = DocDetailType.objects.get(name = 'submission_processed_count').id, defaults= {'doc_detail_value': SourceSubmission.objects\ .filter(process_status = 'PROCESSED',\ document_id = document_id).count()\ }, ) doc_detail, created = DocumentDetail.objects.update_or_create( document_id = document_id, doc_detail_type_id = DocDetailType.objects.get(name = 'doc_datapoint_count').id, defaults= {'doc_detail_value': DocDataPoint.objects\ .filter(document_id = document_id).count()\ }, ) doc_detail, created = DocumentDetail.objects.update_or_create( document_id = document_id, doc_detail_type_id = DocDetailType.objects.get(name = 'datapoint_count').id, defaults= {'doc_detail_value': DataPoint.objects\ .filter(source_submission_id__in=SourceSubmission.objects\ .filter(document_id = document_id).values_list('id',flat=True)).count()\ }, ) queryset = DocumentDetail.objects\ .filter(document_id=document_id).values() return queryset
def test_basic(self): ''' Using the calc_data.csv, create a test_df and target_df. Ensure that the aggregation and calcuation are working properly, but ingesting the stored data, running the cache, and checking that the calculated data for the aggregate region (parent region, in this case Nigeria) is as expected. ''' self.set_up() self.create_raw_datapoints() cr = CacheRefresh() for ix, row in self.target_df.iterrows(): region_id, campaign_id, indicator_id = int(row.region_id),\ int(row.campaign_id),int(row.indicator_id) actual_value = self.get_dwc_value(region_id, campaign_id,\ indicator_id) self.assertEqual(row.value, actual_value)
def do(self): cr = CacheRefresh()