def test_refresh_doc_meta(self): self.set_up() mr = MasterRefresh(self.user.id ,self.document.id) source_submissions_data = SourceSubmission.objects\ .filter(document_id = self.document.id)\ .values('id','submission_json') raw_indicator_list = [k for k,v in json\ .loads(source_submissions_data[0]['submission_json']).iteritems()] mr.refresh_doc_meta() source_indicator_id_for_this_doc = DocumentSourceObjectMap.objects\ .filter(id__in= SourceObjectMap.objects.filter(\ content_type = 'indicator', source_object_code__in = raw_indicator_list ).values_list('id',flat=True) ) ## -3 because we dont want campaign_col,indicator_col and unique_col ## showing up as indicators to map! self.assertEqual(len(source_indicator_id_for_this_doc)\ ,len(raw_indicator_list) -3 )
def odk_form_data_to_datapoints(self): ''' First transalate the csv into source datapoitns ( very similair to the csv upload process), and once the sdps are created, call "refresh_master" and pass the list of sdps. ''' self.to_process_df.columns = map(str.lower, self.to_process_df) column_list = self.to_process_df.columns.tolist() #### DataFrame --> Source DP #### all_sdps = [] for row_number, row in enumerate(self.to_process_df.values): row_dict = dict(zip(column_list,row)) sdps = self.process_row(row_dict,row_number) all_sdps.extend(sdps) #### Source DP --> Master DP #### mr = MasterRefresh(self.user_id,self.document_id) dps = mr.source_dps_to_dps() response_string = 'created %s new soure_datapoints by processing: %s \ nows for document_id: %s, yielding in: %s master datapoints' % \ (len(all_sdps),len(self.to_process_df),self.document_id,len(dps)) return response_string
def odk_refresh_master(self): ''' A refresh master method that deals specifically with source_datpaoints from odk. First we find the source_datapoint_ids that we need from the source ( odk ) and then we pass those IDs as well as the user to the MasterRefresh cache. The system knows to only then refresh datapoints that have been newly created from ODK. ''' try: source_datapoints = SourceDataPoint.objects.filter( status_id = ProcessStatus.objects.get(status_text='TO_PROCESS'), source_id = Source.objects.get(source_name='odk')) m = MasterRefresh(source_datapoints,self.user_id) m.main() dp_count = len(m.new_datapoints) success_msg = 'SUCSSFULLY CREATED: ' + str(dp_count) +\ ' NEW DATPOINTS' except Exception: err = format_exc() return err, None return None, success_msg
def test_refresh_doc_meta(self): self.set_up() mr = MasterRefresh(self.user.id ,self.document.id) ## load this w read_csv(self.test_file_location) remove DocTransform source_submissions_data = SourceSubmission.objects\ .filter(document_id = self.document.id)\ .values('id','submission_json') raw_indicator_list = [k for k,v in json\ .loads(source_submissions_data[0]['submission_json']).iteritems()] mr.refresh_doc_meta() source_indicator_id_for_this_doc = DocumentSourceObjectMap.objects\ .filter(id__in= SourceObjectMap.objects.filter(\ content_type = 'indicator', source_object_code__in = raw_indicator_list ).values_list('id',flat=True) ) ## should be more specific here.. but this proves with a high degree ## ## of certainty that the source_object_map rows have been created ## self.assertEqual(len(source_indicator_id_for_this_doc)\ ,len(raw_indicator_list))
def refresh_master_no_indicator(request, document_id): mr = MasterRefresh(document_id=document_id, indicator_id=None, user_id=request.user.id) mr.source_dps_to_dps() return HttpResponseRedirect(reverse("source_data:field_mapping", kwargs={"document_id": document_id}))
def test_refresh_doc_meta(self): self.set_up() mr = MasterRefresh(self.user.id, self.document.id) source_submissions_data = SourceSubmission.objects\ .filter(document_id = self.document.id)\ .values('id','submission_json') raw_indicator_list = [k for k,v in json\ .loads(source_submissions_data[0]['submission_json']).iteritems()] mr.refresh_doc_meta() source_indicator_id_for_this_doc = DocumentSourceObjectMap.objects\ .filter(id__in= SourceObjectMap.objects.filter(\ content_type = 'indicator', source_object_code__in = raw_indicator_list ).values_list('id',flat=True) ) ## -3 because we dont want campaign_col,indicator_col and unique_col ## showing up as indicators to map! self.assertEqual(len(source_indicator_id_for_this_doc)\ ,len(raw_indicator_list) -3 )
def do(self): user_id = User.objects.get(username = '******').id document_id = SourceSubmission.objects.\ filter(process_status='TO_PROCESS').\ values_list('document_id',flat=True)[0] mr = MasterRefresh(user_id, document_id) mr.main()
def do(self): user_id = User.objects.get(username='******').id document_id = SourceSubmission.objects.\ filter(process_status='TO_PROCESS').\ values_list('document_id',flat=True)[0] mr = MasterRefresh(user_id, document_id) mr.main()
def get_object_list(self, request): document_id = request.GET['document_id'] mr = MasterRefresh(request.user.id, document_id) mr.main() doc_detail, created = DocumentDetail.objects.update_or_create( document_id=document_id, doc_detail_type_id=DocDetailType.objects.get( name='submission_processed_count').id, defaults={ 'doc_detail_value': SourceSubmission.objects.filter( process_status='PROCESSED', document_id=document_id).count() }, ) doc_detail, created = DocumentDetail.objects.update_or_create( document_id=document_id, doc_detail_type_id=DocDetailType.objects.get( name='doc_datapoint_count').id, defaults={ 'doc_detail_value': DocDataPoint.objects.filter(document_id=document_id).count() }, ) doc_detail, created = DocumentDetail.objects.update_or_create( document_id=document_id, doc_detail_type_id=DocDetailType.objects.get( name='datapoint_count').id, defaults={ 'doc_detail_value': DataPoint.objects.filter( source_submission_id__in=SourceSubmission.objects.filter( document_id=document_id).values_list( 'id', flat=True)).count() }, ) queryset = DocumentDetail.objects \ .filter(document_id=document_id).values() return queryset
def test_submission_detail_refresh(self,): self.set_up() mr = MasterRefresh(self.user.id ,self.document.id) source_submissions_data = SourceSubmission.objects\ .filter(document_id = self.document.id)\ .values_list('id',flat=True) # = read_csv(self.test_file) ## fake the submission_data mr.refresh_submission_details() submission_details = SourceSubmission.objects\ .filter(document_id = self.document.id) self.assertEqual(len(source_submissions_data)\ ,len(submission_details))
def test_refresh_master_init(self): self.set_up() mr = MasterRefresh(self.user.id\ ,self.document.id) self.assertTrue(isinstance,(mr,MasterRefresh)) self.assertEqual(self.document.id,mr.document_id)
def test_mapping(self): ''' Here we ensure that after mapping all of the meta data that we have the expected number of rows with the appropiate data associated. ''' self.set_up() mr = MasterRefresh(self.source_datapoints,self.user.id\ ,self.document.id,self.indicator.id) ## create the source metadata ## create_source_meta_data(self.document.id) ## create mappings ( this is mimicking how bo would map metadata ) ## rm_1 = RegionMap.objects.create( mapped_by_id = self.user.id, source_id = SourceRegion.objects.get(region_code=self\ .region_1_code).id, master_id = self.region_1.id) cm_1 = CampaignMap.objects.create( mapped_by_id = self.user.id, source_id = SourceCampaign.objects.get(campaign_string=\ self.campaign_string).id, master_id = self.campaign.id) im_1 = IndicatorMap.objects.create( mapped_by_id = self.user.id, source_id = SourceIndicator.objects.get(indicator_string=\ self.indicator_string).id, master_id = self.indicator.id) mr.source_dps_to_dps() x = DataPoint.objects.filter( region_id = self.region_1.id, campaign_id = self.campaign.id, indicator_id = self.indicator.id, ) self.assertEqual(len(x),1)
def get_object_list(self, request): document_id = request.GET["document_id"] mr = MasterRefresh(request.user.id, document_id) mr.submissions_to_doc_datapoints() mr.sync_datapoint() cr = CacheRefresh() cr.main() ## upsert document meta from the last run ## ## fixme -> abstract this <- ## doc_detail, created = DocumentDetail.objects.update_or_create( document_id=document_id, doc_detail_type_id=DocDetailType.objects.get(name="submission_processed_count").id, defaults={"doc_detail_value": SourceSubmission.objects.filter(process_status="PROCEESED").count()}, ) doc_detail, created = DocumentDetail.objects.update_or_create( document_id=document_id, doc_detail_type_id=DocDetailType.objects.get(name="doc_datapoint_count").id, defaults={"doc_detail_value": DocDataPoint.objects.filter(document_id=document_id).count()}, ) doc_detail, created = DocumentDetail.objects.update_or_create( document_id=document_id, doc_detail_type_id=DocDetailType.objects.get(name="datapoint_count").id, defaults={ "doc_detail_value": DataPoint.objects.filter( source_submission_id__in=SourceSubmission.objects.filter(document_id=document_id).values_list( "id", flat=True ) ).count() }, ) queryset = DocumentDetail.objects.filter(document_id=document_id).values() return queryset
def get_object_list(self,request): document_id = request.GET['document_id'] mr = MasterRefresh(request.user.id, document_id) mr.submissions_to_doc_datapoints() mr.sync_datapoint() cr = CacheRefresh() cr.main() doc_detail, created = DocumentDetail.objects.update_or_create( document_id = document_id, doc_detail_type_id = DocDetailType.objects.get(name = 'submission_processed_count').id, defaults= {'doc_detail_value': SourceSubmission.objects\ .filter(process_status = 'PROCESSED',\ document_id = document_id).count()\ }, ) doc_detail, created = DocumentDetail.objects.update_or_create( document_id = document_id, doc_detail_type_id = DocDetailType.objects.get(name = 'doc_datapoint_count').id, defaults= {'doc_detail_value': DocDataPoint.objects\ .filter(document_id = document_id).count()\ }, ) doc_detail, created = DocumentDetail.objects.update_or_create( document_id = document_id, doc_detail_type_id = DocDetailType.objects.get(name = 'datapoint_count').id, defaults= {'doc_detail_value': DataPoint.objects\ .filter(source_submission_id__in=SourceSubmission.objects\ .filter(document_id = document_id).values_list('id',flat=True)).count()\ }, ) queryset = DocumentDetail.objects\ .filter(document_id=document_id).values() return queryset
def refresh_master_for_document(self, list_of_object_ids): mr = MasterRefresh(self.user_id,self.document_id) qset = Document.objects.raw(''' DROP TABLE IF EXISTS _this_doc; CREATE TEMP TABLE _this_doc AS SELECT sd.* FROM source_data_document sd WHERE id = %s; SELECT td.* ,x.doc_datapoint_cnt ,y.source_submission_total_cnt ,z.source_submission_to_process_cnt FROM _this_doc td INNER JOIN ( SELECT COUNT(1) as doc_datapoint_cnt FROM doc_datapoint dd WHERE EXISTS ( SELECT 1 FROM _this_doc td WHERE dd.document_id = td.id ) )x ON 1=1 INNER JOIN ( SELECT COUNT(1) as source_submission_total_cnt FROM source_submission ss WHERE EXISTS ( SELECT 1 FROM _this_doc td WHERE ss.document_id = td.id ) )y ON 1=1 INNER JOIN ( SELECT COUNT(1) as source_submission_to_process_cnt FROM source_submission ss WHERE EXISTS ( SELECT 1 FROM _this_doc td WHERE ss.document_id = td.id ) AND ss.process_status = 'TO_PROCESS' )z ON 1=1; ''',[self.document_id]) return None, qset
def test_submission_detail_refresh(self, ): self.set_up() mr = MasterRefresh(self.user.id, self.document.id) mr.refresh_doc_meta() ## FIXME replace source_submission_data with read_csv(self.test_file) source_submissions_data = SourceSubmission.objects\ .filter(document_id = self.document.id)\ .values_list('id',flat=True) # = read_csv(self.test_file) ## fake the submission_data mr.refresh_submission_details() submission_details = SourceSubmission.objects\ .filter(document_id = self.document.id) self.assertEqual(len(source_submissions_data)\ ,len(submission_details))
def test_source_data_points_to_doc_datapoints(self): mr = MasterRefresh(self.user.id, self.document.id) doc_datapoint_ids = mr.process_doc_datapoints(self.source_submissions) self.assertTrue(doc_datapoint_ids)
def refresh_master(request,document_id): mr = MasterRefresh(request.user.id, document_id) mr.main() return HttpResponseRedirect('/doc_review/overview/%s' % document_id)
def test_submission_to_datapoint(self): ''' This simulates the following use case: As a user journey we can describe this test case as: - user uploads file ( see how set_up method calls DocTransform ) - user maps metadata - user clicks " refresh master " -> user checks to see if data is correct - user realizes that the data is wrong, due to an invalid mapping - user re-mapps the data and clicks " refresh master" -> data from old mapping should be deleted and associated to the newly mapped value TEST CASES: 1. WHen the submission detail is refreshed, the location/campaign ids that we mapped should exist in that row. 2. DocDataPoint records are created if the necessary mapping exists 3. There are no zero or null values allowed in doc_datapoint 4. The doc_datapoint from #3 is merged into datpaoint. 5. I create mappings, sync data, realize the mapping was incorrect, re-map the metadata and the old data should be deleted, the new data created. -> was the old data deleted? -> was the new data created? ''' self.set_up() submission_qs = SourceSubmission.objects\ .filter(document_id = self.document.id)\ .values_list('id','submission_json')[0] ss_id, first_submission = submission_qs[0],json.loads(submission_qs[1]) location_code = first_submission[self.location_code_input_column] campaign_code = first_submission[self.campaign_code_input_column] raw_indicator_list = [k for k,v in first_submission.iteritems()] indicator_code = raw_indicator_list[-1] ## SIMULATED USER MAPPING ## ## see: datapoints/source-data/Nigeria/2015/06/mapping/2 ## choose meta data values for the source_map update ## map_location_id = Location.objects.all()[0].id map_campaign_id = Campaign.objects.all()[0].id first_indicator_id = Indicator.objects.all()[0].id ## map location ## som_id_l = SourceObjectMap.objects.get( content_type = 'location', source_object_code = location_code, ) som_id_l.master_object_id = map_location_id som_id_l.save() ## map campaign ## som_id_c = SourceObjectMap.objects.get( content_type = 'campaign', source_object_code = campaign_code, ) som_id_c.master_object_id = map_campaign_id som_id_c.save() ## map indicator ## som_id_i = SourceObjectMap.objects.get( content_type = 'indicator', source_object_code = indicator_code, ) som_id_i.master_object_id = first_indicator_id som_id_i.save() mr_with_new_meta = MasterRefresh(self.user.id ,self.document.id) mr_with_new_meta.refresh_submission_details() first_submission_detail = SourceSubmission.objects\ .get(id = ss_id) ## Test Case 2 ## self.assertEqual(first_submission_detail.location_id, map_location_id) self.assertEqual(first_submission_detail.campaign_id, map_campaign_id) ## now that we have created the mappign, "refresh_master" ## ## should create the relevant datapoints ## mr_with_new_meta.submissions_to_doc_datapoints() doc_dp_ids = DocDataPoint.objects.filter(document_id = self.document.id) ## Test Case #3 self.assertEqual(1,len(doc_dp_ids)) mr_with_new_meta.sync_datapoint() dps = DataPoint.objects.all() ## Test Case #4 self.assertEqual(1,len(dps)) ## Test Case #5 ## update the mapping with a new indicator value ## new_indicator_id = Indicator.objects.all()[1].id som_id_i.master_object_id = new_indicator_id som_id_i.save() mr_after_new_mapping = MasterRefresh(self.user.id ,self.document.id) mr_after_new_mapping.main() dp_with_new_indicator = DataPoint.objects.filter(indicator_id = \ new_indicator_id) dp_with_old_indicator = DataPoint.objects.filter(indicator_id = \ first_indicator_id) ## did new indicator flow through the system ?## self.assertEqual(1,len(dp_with_new_indicator)) ## did the old indicator data get deleted? self.assertEqual(0,len(dp_with_old_indicator))
def test_submission_to_datapoint(self): ''' This simulates a new document being processed with source_object_map records that have been sucessfullly mapped to master_ids but from a different document. A few things this method checks: 1. there is a record in doc_object_map even for mappings that existed before this document was ingested. 2. WHen the submission detail is refreshed, the reiogn/campaign ids that we mapped should exist in that row. 2. DocDataPoint records are created if the necessary mapping exists 3. There are no zero or null values allowed in doc_datapoint 4. The doc_datapoint from #3 is merged into datpaoint. ''' self.set_up() mr = MasterRefresh(self.user.id, self.document.id) submission_qs = SourceSubmission.objects\ .filter(document_id = self.document.id)\ .values_list('id','submission_json')[0] ss_id, first_submission = submission_qs[0], json.loads( submission_qs[1]) location_code = first_submission[self.location_code_input_column] campaign_code = first_submission[self.campaign_code_input_column] raw_indicator_list = [k for k, v in first_submission.iteritems()] indicator_code = raw_indicator_list[-1] map_location_id = Location.objects.all()[0].id som_id_r = SourceObjectMap.objects.create( content_type='location', source_object_code=location_code, master_object_id=map_location_id, mapped_by_id=self.user.id) map_campaign_id = Campaign.objects.all()[0].id som_id_c = SourceObjectMap.objects.create( content_type='campaign', source_object_code=campaign_code, master_object_id=map_campaign_id, mapped_by_id=self.user.id) map_indicator_id = Indicator.objects.all()[0].id som_id_i = SourceObjectMap.objects.create( content_type='indicator', source_object_code=indicator_code, master_object_id=map_indicator_id, mapped_by_id=self.user.id) mr.refresh_doc_meta() doc_som_id_for_location_code = DocumentSourceObjectMap.objects\ .filter(id__in= SourceObjectMap.objects.filter(\ content_type = 'location', source_object_code = location_code, ).values_list('id',flat=True) ).values_list('id',flat=True) ## Test Case 1 ## self.assertEqual(len(doc_som_id_for_location_code), 1) # 1 mr_with_new_meta = MasterRefresh(self.user.id, self.document.id) mr_with_new_meta.refresh_submission_details() first_submission_detail = SourceSubmission.objects\ .get(id = ss_id) ## Test Case 2 ## self.assertEqual(first_submission_detail.location_id, map_location_id) self.assertEqual(first_submission_detail.campaign_id, map_campaign_id) mr_with_new_meta.submissions_to_doc_datapoints() doc_dp_ids = DocDataPoint.objects.filter(document_id=self.document.id) ## Test Case #3 self.assertEqual(1, len(doc_dp_ids)) mr_with_new_meta.sync_datapoint() dps = DataPoint.objects.all() ## Test Case #4 self.assertEqual(1, len(dps))
def refresh_master(request, document_id): mr = MasterRefresh(request.user.id, document_id) mr.main() return HttpResponseRedirect('/doc_review/overview/%s' % document_id)
def test_submission_to_datapoint(self): ''' This simulates a new document being processed with source_object_map records that have been sucessfullly mapped to master_ids but from a different document. A few things this method checks: 1. there is a record in doc_object_map even for mappings that existed before this document was ingested. 2. WHen the submission detail is refreshed, the reiogn/campaign ids that we mapped should exist in that row. 2. DocDataPoint records are created if the necessary mapping exists 3. There are no zero or null values allowed in doc_datapoint 4. The doc_datapoint from #3 is merged into datpaoint. ''' self.set_up() mr = MasterRefresh(self.user.id ,self.document.id) submission_qs = SourceSubmission.objects\ .filter(document_id = self.document.id)\ .values_list('id','submission_json')[0] ss_id, first_submission = submission_qs[0],json.loads(submission_qs[1]) location_code = first_submission[self.location_code_input_column] campaign_code = first_submission[self.campaign_code_input_column] raw_indicator_list = [k for k,v in first_submission.iteritems()] indicator_code = raw_indicator_list[-1] map_location_id = Location.objects.all()[0].id som_id_r = SourceObjectMap.objects.create( content_type = 'location', source_object_code = location_code, master_object_id = map_location_id, mapped_by_id = self.user.id ) map_campaign_id = Campaign.objects.all()[0].id som_id_c = SourceObjectMap.objects.create( content_type = 'campaign', source_object_code = campaign_code, master_object_id = map_campaign_id, mapped_by_id = self.user.id ) map_indicator_id = Indicator.objects.all()[0].id som_id_i = SourceObjectMap.objects.create( content_type = 'indicator', source_object_code = indicator_code, master_object_id = map_indicator_id, mapped_by_id = self.user.id ) mr.refresh_doc_meta() doc_som_id_for_location_code = DocumentSourceObjectMap.objects\ .filter(id__in= SourceObjectMap.objects.filter(\ content_type = 'location', source_object_code = location_code, ).values_list('id',flat=True) ).values_list('id',flat=True) ## Test Case 1 ## self.assertEqual(len(doc_som_id_for_location_code),1) # 1 mr_with_new_meta = mr = MasterRefresh(self.user.id ,self.document.id) first_submission_detail = SourceSubmissionDetail.objects\ .get(source_submission_id = ss_id) ## Test Case 2 ## self.assertEqual(first_submission_detail.location_id, map_location_id) self.assertEqual(first_submission_detail.campaign_id, map_campaign_id) mr.submissions_to_doc_datapoints() doc_dp_ids = DocDataPoint.objects.filter(document_id = self.document.id) ## Test Case #3 self.assertEqual(1,len(doc_dp_ids)) mr.sync_datapoint() dps = DataPoint.objects.all() ## Test Case #4 self.assertEqual(1,len(doc_dp_ids))