Ejemplos de MasterRefresh en Python, ejemplos de rhizome.etl_tasks.refresh_master.MasterRefresh en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: 0003_populate_initial_source_data.py Proyecto: zhangsichu/rhizome

def process_source_sheet(source_sheet_df, sheet_name):

    user_id = -1

    # file_loc = settings.MEDIA_ROOT + sheet_name
    saved_csv_file_location = settings.MEDIA_ROOT + sheet_name + '.csv'
    source_sheet_df.to_csv(saved_csv_file_location)
    doc_file_text = sheet_name + '.csv'

    new_doc = Document.objects.create(
        doc_title = doc_file_text,
        guid = 'test',
        docfile=doc_file_text
    )

    create_doc_details(new_doc.id)

    ## document -> source_submissions ##
    dt = DocTransform(user_id, new_doc.id)
    dt.main()

    ## source_submissions -> datapoints ##
    mr = MasterRefresh(user_id, new_doc.id)
    mr.main()

    ## datapoints -> computed datapoints ##
    ar = AggRefresh()

Ejemplo n.º 2

0

Mostrar archivo

Archivo: cron.py Proyecto: cotsog/rhizome

    def do(self):
        user_id = User.objects.get(username = '******').id
        document_id = SourceSubmission.objects.\
            filter(process_status='TO_PROCESS').\
            values_list('document_id',flat=True)[0]

        mr = MasterRefresh(user_id, document_id)
        mr.main()

Ejemplo n.º 3

0

Mostrar archivo

Archivo: cron.py Proyecto: zhangsichu/rhizome

    def do(self):
        user_id = User.objects.get(username='******').id
        document_id = SourceSubmission.objects.\
            filter(process_status='TO_PROCESS').\
            values_list('document_id',flat=True)[0]

        mr = MasterRefresh(user_id, document_id)
        mr.main()

Ejemplo n.º 4

0

Mostrar archivo

Archivo: test_transform_upload.py Proyecto: zhangsichu/rhizome

 def test_duplicate_rows(self):
     doc_id = self.ingest_file('dupe_datapoints.csv')
     dt = ComplexDocTransform(self.user.id, doc_id)
     source_submissions = dt.main()
     mr = MasterRefresh(self.user.id, doc_id)
     mr.main()
     dps = DataPoint.objects.all()
     self.assertEqual(len(dps), 1)
     some_cell_value_from_the_file = 0.9029
     self.assertEqual(dps[0].value, some_cell_value_from_the_file)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: doc_trans_form.py Proyecto: zhangsichu/rhizome

    def get_object_list(self, request):
        '''

        ## when you upload a file, step one is getting data into source submission
            ## --> DocTransform <-- ##

        ## Step two is translating form source_submission into datapoints
            ## --> REfreshMaster <----

        ## step three is aggregation
            ## agg refresh ##

        '''
        try:
            doc_id = request.GET['document_id']
        except KeyError:
            raise DatapointsException(
                message='Document_id is a required API param')
        # dt = DocTransform(request.user.id, doc_id)

        ran_complex_doc_transform = False

        try:
            dt = ComplexDocTransform(request.user.id, doc_id)
            dt.main()
            ran_complex_doc_transform = True
        except Exception as err:
            try:
                dt = DateDocTransform(request.user.id, doc_id)
                ssids = dt.process_file()

            except Exception as err:
                raise DatapointsException(message=err.message)

        mr = MasterRefresh(request.user.id, doc_id)
        mr.main()

        if ran_complex_doc_transform:
            doc_campaign_ids = set(list(DataPoint.objects\
                .filter(source_submission__document_id = doc_id)\
                .values_list('campaign_id',flat=True)))

            for c_id in doc_campaign_ids:
                ar = AggRefresh(c_id)
                # try/except block hack because tests fail otherwise
                try:
                    with transaction.atomic():
                        ar.main()
                except TransactionManagementError as e:
                    pass
        return Document.objects.filter(id=doc_id).values()

Ejemplo n.º 6

0

Mostrar archivo

Archivo: doc_trans_form.py Proyecto: zhangsichu/rhizome

    def get_object_list(self, request):
        '''

        ## when you upload a file, step one is getting data into source submission
            ## --> DocTransform <-- ##

        ## Step two is translating form source_submission into datapoints
            ## --> REfreshMaster <----

        ## step three is aggregation
            ## agg refresh ##

        '''
        try:
            doc_id = request.GET['document_id']
        except KeyError:
            raise DatapointsException(message='Document_id is a required API param')
        # dt = DocTransform(request.user.id, doc_id)

        ran_complex_doc_transform = False

        try:
            dt = ComplexDocTransform(request.user.id, doc_id)
            dt.main()
            ran_complex_doc_transform = True
        except Exception as err:
            try:
                dt = DateDocTransform(request.user.id, doc_id)
                ssids = dt.process_file()

            except Exception as err:
                raise DatapointsException(message=err.message)
        
        mr = MasterRefresh(request.user.id, doc_id)
        mr.main()

        if ran_complex_doc_transform:
            doc_campaign_ids = set(list(DataPoint.objects\
                .filter(source_submission__document_id = doc_id)\
                .values_list('campaign_id',flat=True)))

            for c_id in doc_campaign_ids:
                ar = AggRefresh(c_id)
                # try/except block hack because tests fail otherwise
                try:
                    with transaction.atomic():
                        ar.main()
                except TransactionManagementError as e:
                    pass
        return Document.objects.filter(id=doc_id).values()

Ejemplo n.º 7

0

Mostrar archivo

Archivo: test_refresh_master.py Proyecto: mepoole/rhizome

    def test_submission_detail_refresh(self,):

        self.set_up()
        mr = MasterRefresh(self.user.id ,self.document.id)

        source_submissions_data = SourceSubmission.objects\
            .filter(document_id = self.document.id)\
            .values_list('id',flat=True)

        mr.refresh_submission_details()
        submission_details = SourceSubmission.objects\
            .filter(document_id = self.document.id)

        self.assertEqual(len(source_submissions_data)\
            ,len(submission_details))

Ejemplo n.º 8

0

Mostrar archivo

Archivo: test_refresh_master.py Proyecto: default50/rhizome

    def test_submission_detail_refresh(self,):

        self.set_up()
        mr = MasterRefresh(self.user.id ,self.document.id)

        source_submissions_data = SourceSubmission.objects\
            .filter(document_id = self.document.id)\
            .values_list('id',flat=True)

        mr.refresh_submission_details()
        submission_details = SourceSubmission.objects\
            .filter(document_id = self.document.id)

        self.assertEqual(len(source_submissions_data)\
            ,len(submission_details))

Ejemplo n.º 9

0

Mostrar archivo

Archivo: test_transform_upload.py Proyecto: zhangsichu/rhizome

    def test_class_indicator_mapping(self):

        lqas_indicator = Indicator.objects.create(
            name = 'LQAS',
            short_name = 'LQAS',
            data_format = 'class'
        )

        mapping_1 = IndicatorClassMap.objects.create(
        indicator = lqas_indicator,
        string_value = "High Pass",
        enum_value = 4,
        is_display =True)        

        mapping_2 = IndicatorClassMap.objects.create(
        indicator = lqas_indicator,
        string_value = "HP",
        enum_value = 4,
        is_display =False)

        lqas_som = SourceObjectMap.objects.create(
            source_object_code = 'LQAS',
            content_type = 'indicator',
            mapped_by_id = self.user_id,
            master_object_id = lqas_indicator.id
        )

        doc_id = self.ingest_file('class_indicator.csv')
        ComplexDocTransform(self.user.id, doc_id).main()
        MasterRefresh(self.user.id, doc_id).main()
        dps = DataPoint.objects.all()
        self.assertEqual(len(dps), 1)
        self.assertEqual(dps[0].value, 4)

Ejemplo n.º 10

0

Mostrar archivo

Archivo: refresh_master.py Proyecto: zhangsichu/rhizome

    def get_object_list(self, request):
        try:
            doc_id = request.GET['document_id']
        except KeyError:
            raise DatapointsException(message='Document_id is a required API param')
        # dt = DocTransform(request.user.id, doc_id)

        mr = MasterRefresh(request.user.id, doc_id)
        mr.main()

        doc_campaign_ids = set(list(DataPoint.objects\
            .filter(source_submission__document_id = doc_id)\
            .values_list('campaign_id',flat=True)))
        for c_id in doc_campaign_ids:
            ar = AggRefresh(c_id)

        return Document.objects.filter(id=doc_id).values()

Ejemplo n.º 11

0

Mostrar archivo

Archivo: test_transform_upload.py Proyecto: zhangsichu/rhizome

 def test_percent_vals(self):
     doc_id = self.ingest_file('percent_vals.csv')
     ComplexDocTransform(self.user.id, doc_id).main()
     MasterRefresh(self.user.id, doc_id).main()
     dps = DataPoint.objects.all()
     self.assertEqual(len(dps), 2)
     expected_dp_val = 0.8267
     dp = DataPoint.objects.get(indicator_id = self.mapped_indicator_with_data)
     self.assertEqual(expected_dp_val, dp.value)

Ejemplo n.º 12

0

Mostrar archivo

Archivo: refresh_master.py Proyecto: zhangsichu/rhizome

    def get_object_list(self, request):
        try:
            doc_id = request.GET['document_id']
        except KeyError:
            raise DatapointsException(
                message='Document_id is a required API param')
        # dt = DocTransform(request.user.id, doc_id)

        mr = MasterRefresh(request.user.id, doc_id)
        mr.main()

        doc_campaign_ids = set(list(DataPoint.objects\
            .filter(source_submission__document_id = doc_id)\
            .values_list('campaign_id',flat=True)))
        for c_id in doc_campaign_ids:
            ar = AggRefresh(c_id)

        return Document.objects.filter(id=doc_id).values()

Ejemplo n.º 13

0

Mostrar archivo

Archivo: refresh_master.py Proyecto: mepoole/rhizome

    def get_object_list(self, request):
        document_id = request.GET['document_id']

        mr = MasterRefresh(request.user.id, document_id)
        mr.main()

        ar = AggRefresh()

        doc_detail, created = DocumentDetail.objects.update_or_create(
            document_id=document_id,
            doc_detail_type_id=DocDetailType.objects.get(name='submission_processed_count').id,
            defaults={
                'doc_detail_value': SourceSubmission.objects.filter(
                    process_status='PROCESSED',
                    document_id=document_id).count()
            },
        )

        doc_detail, created = DocumentDetail.objects.update_or_create(
            document_id=document_id,
            doc_detail_type_id=DocDetailType.objects.get(name='doc_datapoint_count').id,
            defaults={
                'doc_detail_value': DocDataPoint.objects.filter(document_id=document_id).count()
            },
        )

        doc_detail, created = DocumentDetail.objects.update_or_create(
            document_id=document_id,
            doc_detail_type_id=DocDetailType.objects.get(name='datapoint_count').id,
            defaults={
                'doc_detail_value': DataPoint.objects.filter(
                    source_submission_id__in=SourceSubmission.objects.filter(
                        document_id=document_id).values_list('id', flat=True)).count()
            },
        )

        queryset = DocumentDetail.objects \
            .filter(document_id=document_id).values('id','doc_detail_type_id'\
                ,'doc_detail_type__name','document_id', 'doc_detail_value')

        return queryset

Ejemplo n.º 14

0

Mostrar archivo

Archivo: test_refresh_master.py Proyecto: mepoole/rhizome

    def test_latest_data_gets_synced(self):
        '''
        I upload a spreadsheet on tuesday, but i realized that the data was wrong, so i upload another sheet with the same locations, dates
        and indicators.  The new spreasheet should override, and not aggregate any duplicative data.
        '''

        self.set_up()

        test_ind_id = Indicator.objects.all()[0].id
        test_loc_id = Location.objects.all()[0].id
        bad_val, good_val = 10, 20
        data_date = '2015-12-31'
        ss_old = SourceSubmission.objects\
            .filter(document_id = self.document.id)[0]


        doc_to_override = Document.objects.create(
                doc_title = 'override',
                created_by_id = self.user.id,
                guid = 'override'
        )

        ss_new = SourceSubmission.objects.create(
            document_id = doc_to_override.id,
            instance_guid = 'override',
            row_number = 1,
            data_date = '2016-01-01',
            location_code = 'OVERRIDE',
            location_display = 'OVERRIDE',
            submission_json = '',
            process_status = 1
        )

        base_doc_dp_dict = {
            'document_id' : self.document.id,
            'indicator_id' : test_ind_id,
            'location_id' : test_loc_id,
            'data_date' : data_date,
            'agg_on_location': True,
        }

        bad_doc_dp_dict = {
            'value' : bad_val,
            'data_date' : data_date,
            'source_submission_id' : ss_old.id,
        }
        bad_doc_dp_dict.update(base_doc_dp_dict)

        good_doc_dp_dict = {
            'value' : good_val,
            'data_date' : data_date,
            'source_submission_id' : ss_new.id,
        }
        good_doc_dp_dict.update(base_doc_dp_dict)

        DocDataPoint.objects.create(**good_doc_dp_dict)
        DocDataPoint.objects.create(**bad_doc_dp_dict)

        mr = MasterRefresh(self.user.id, self.document.id)
        mr.sync_datapoint([ss_old.id, ss_new.id])

        dp_result = DataPoint.objects.filter(
            location_id = test_loc_id,
            indicator_id = test_ind_id,
            data_date = data_date
        )

        self.assertEqual(1,len(dp_result))
        self.assertEqual(good_val, dp_result[0].value)

Ejemplo n.º 15

0

Mostrar archivo

Archivo: test_refresh_master.py Proyecto: default50/rhizome

    def test_latest_data_gets_synced(self):
        '''
        I upload a spreadsheet on tuesday, but i realized that the data was wrong, so i upload another sheet with the same locations, dates
        and indicators.  The new spreasheet should override, and not aggregate any duplicative data.
        '''

        self.set_up()

        test_ind_id = Indicator.objects.all()[0].id
        test_loc_id = Location.objects.all()[0].id
        bad_val, good_val = 10, 20
        data_date = '2015-12-31'
        ss_old = SourceSubmission.objects\
            .filter(document_id = self.document.id)[0]


        doc_to_override = Document.objects.create(
                doc_title = 'override',
                created_by_id = self.user.id,
                guid = 'override'
        )

        ss_new = SourceSubmission.objects.create(
            document_id = doc_to_override.id,
            instance_guid = 'override',
            row_number = 1,
            data_date = '2016-01-01',
            location_code = 'OVERRIDE',
            location_display = 'OVERRIDE',
            submission_json = '',
            process_status = 1
        )

        base_doc_dp_dict = {
            'document_id' : self.document.id,
            'indicator_id' : test_ind_id,
            'location_id' : test_loc_id,
            'data_date' : data_date,
            'agg_on_location': True,
        }

        bad_doc_dp_dict = {
            'value' : bad_val,
            'data_date' : data_date,
            'source_submission_id' : ss_old.id,
        }
        bad_doc_dp_dict.update(base_doc_dp_dict)

        good_doc_dp_dict = {
            'value' : good_val,
            'data_date' : data_date,
            'source_submission_id' : ss_new.id,
        }
        good_doc_dp_dict.update(base_doc_dp_dict)

        DocDataPoint.objects.create(**good_doc_dp_dict)
        DocDataPoint.objects.create(**bad_doc_dp_dict)

        mr = MasterRefresh(self.user.id, self.document.id)
        mr.sync_datapoint([ss_old.id, ss_new.id])

        dp_result = DataPoint.objects.filter(
            location_id = test_loc_id,
            indicator_id = test_ind_id,
            data_date = data_date
        )

        self.assertEqual(1,len(dp_result))
        self.assertEqual(good_val, dp_result[0].value)

Ejemplo n.º 16

0

Mostrar archivo

Archivo: test_refresh_master.py Proyecto: default50/rhizome

    def test_submission_to_datapoint(self):
        '''
        This simulates the following use case:

        As a user journey we can describe this test case as:
            - user uploads file ( see how set_up method calls ComplexDocTransform )
            - user maps metadata
            - user clicks " refresh master "
                -> user checks to see if data is correct
            - user realizes that the data is wrong, due to an invalid mapping
            - user re-mapps the data and clicks " refresh master"
                -> data from old mapping should be deleted and associated to
                   the newly mapped value

        TEST CASES:
            1. WHen the submission detail is refreshed, the location/campaign ids
               that we mapped should exist in that row.
            2. DocDataPoint records are created if the necessary mapping exists
            3. There are no zero or null values allowed in doc_datapoint
            4. The doc_datapoint from #3 is merged into datpaoint.
            5. I create mappings, sync data, realize the mapping was incorrect,
               re-map the metadata and the old data should be deleted, the new
               data created.
                 -> was the old data deleted?
                 -> was the new data created?
        '''

        self.set_up()

        submission_qs = SourceSubmission.objects\
            .filter(document_id = self.document.id)\
            .values_list('id','submission_json')[0]

        ss_id, first_submission = submission_qs[0],json.loads(submission_qs[1])

        location_code = first_submission[self.location_code_input_column]
        data_date = first_submission[self.data_date_input_column]
        raw_indicator_list = [k for k,v in first_submission.iteritems()]

        indicator_code = raw_indicator_list[-1]

        ## SIMULATED USER MAPPING ##
        ## see: source-data/Nigeria/2015/06/mapping/2

        ## choose meta data values for the source_map update ##
        map_location_id = Location.objects.all()[0].id
        first_indicator_id = Indicator.objects.all()[0].id

        ## map location ##
        som_id_l = SourceObjectMap.objects.get(
            content_type = 'location',
            source_object_code = location_code,
        )
        som_id_l.master_object_id = map_location_id
        som_id_l.save()

        ## map indicator ##
        som_id_i = SourceObjectMap.objects.get(
            content_type = 'indicator',
            source_object_code = indicator_code,
        )
        som_id_i.master_object_id = first_indicator_id
        som_id_i.save()

        mr_with_new_meta = MasterRefresh(self.user.id ,self.document.id)
        mr_with_new_meta.refresh_submission_details()

        first_submission_detail = SourceSubmission.objects\
            .get(id = ss_id)

        ## Test Case 2 ##
        self.assertEqual(first_submission_detail.get_location_id(), map_location_id)

        ## now that we have created the mappign, "refresh_master" ##
        ##         should create the relevant datapoints          ##

        mr_with_new_meta.submissions_to_doc_datapoints()
        doc_dp_ids = DocDataPoint.objects.filter(document_id =
            self.document.id,indicator_id=first_indicator_id)

        ## Test Case #3
        self.assertEqual(1,len(doc_dp_ids))

        mr_with_new_meta.sync_datapoint()
        dps = DataPoint.objects.all()

        ## Test Case #4
        self.assertEqual(1,len(dps))

        ## Test Case #5

        ## update the mapping with a new indicator value ##
        new_indicator_id = Indicator.objects.all()[1].id
        som_id_i.master_object_id = new_indicator_id
        som_id_i.save()

        mr_after_new_mapping = MasterRefresh(self.user.id ,self.document.id)
        mr_after_new_mapping.main()

        dp_with_new_indicator = DataPoint.objects.filter(indicator_id = \
            new_indicator_id)

        dp_with_old_indicator = DataPoint.objects.filter(indicator_id = \
            first_indicator_id)

        ## did new indicator flow through the system ?##
        self.assertEqual(1,len(dp_with_new_indicator))

        ## did the old indicator data get deleted?
        self.assertEqual(0,len(dp_with_old_indicator))

Ejemplo n.º 17

0

Mostrar archivo

Archivo: test_refresh_master.py Proyecto: default50/rhizome

    def test_refresh_master_init(self):

        self.set_up()
        mr = MasterRefresh(self.user.id ,self.document.id)

        self.assertTrue(isinstance,(mr,MasterRefresh))

Ejemplo n.º 18

0

Mostrar archivo

Archivo: test_refresh_master.py Proyecto: default50/rhizome

    def test_data_that_needs_campaign(self):
        '''
        AFter syncing a document, the system needs to ensure that data that
        has no associated campaign, will be marked with a cache_job_id of -2.

        The aggregation framework searches for datapitns with cache_job_id = -1
        but if one of these datapoitns has no associated campaign, then
        aggregation will break.

        Thus we must ensure that any data that needs campaign is marked
        accordingly, so we dont break aggregation, and so the user knows that
        there is data in the recent upload that needs to have an associated
        campaign.

        python manage.py test rhizome.tests.test_refresh_master\
        .RefreshMasterTestCase.test_data_that_needs_campaign \
        --settings=rhizome.settings.test
        '''

        self.set_up()


        office_obj = Office.objects.all()[0]
        tag_obj = IndicatorTag.objects.all()[0]
        location_obj = Location.objects.all()[0]
        indicator_obj = Indicator.objects.all()[0]
        campaign_type_obj = CampaignType.objects.all()[0]
        start_date, end_date, bad_date = '2016-12-01','2016-12-30','2017-01-01'
        source_submission_obj = SourceSubmission.objects.all()[0]

        my_campaign = Campaign.objects.create(
            start_date = start_date,
            end_date = end_date,
            name = 'my_campaign',
            top_lvl_location = location_obj,
            top_lvl_indicator_tag = tag_obj,
            office = office_obj,
            campaign_type = campaign_type_obj
        )

        ## skipping over all of the refresH-master methos
        ## and  just testing this particular test case
        base_dp_dict = {
            'location_id': location_obj.id,
            'indicator_id': indicator_obj.id,
            'value': 100,
            'source_submission_id': source_submission_obj.id,
        }

        ## create dictionaries for the two datapoints ##
        no_campaign_dp_dict = {'data_date' : bad_date}
        has_campaign_dp_dict = {'data_date' : start_date}

        no_campaign_dp_dict.update(base_dp_dict)
        has_campaign_dp_dict.update(base_dp_dict)

        ## create the datapoints ##
        dp_without_campaign = DataPoint.objects.create(**no_campaign_dp_dict)
        dp_with_campaign = DataPoint.objects.create(**has_campaign_dp_dict)

        ## instantiate the MasterRefresh object ##
        mr = MasterRefresh(self.user.id, self.document.id)
        mr.ss_ids_to_process = [source_submission_obj]

        ## this is the method we are testing ##
        mr.mark_datapoints_with_needs_campaign()

        ## now fetch the data after updating the cache_job_ids ##
        no_campaign_dp_dict.pop('cache_job_id', None)
        no_campaign = DataPoint.objects.get(**no_campaign_dp_dict)
        has_campaign = DataPoint.objects.get(**has_campaign_dp_dict)

        self.assertEqual(has_campaign.cache_job_id,-1)
        self.assertEqual(no_campaign.cache_job_id,-2)

Ejemplo n.º 19

0

Mostrar archivo

Archivo: test_refresh_master.py Proyecto: mepoole/rhizome

    def test_data_that_needs_campaign(self):
        '''
        AFter syncing a document, the system needs to ensure that data that
        has no associated campaign, will be marked with a cache_job_id of -2.

        The aggregation framework searches for datapitns with cache_job_id = -1
        but if one of these datapoitns has no associated campaign, then
        aggregation will break.

        Thus we must ensure that any data that needs campaign is marked
        accordingly, so we dont break aggregation, and so the user knows that
        there is data in the recent upload that needs to have an associated
        campaign.

        python manage.py test rhizome.tests.test_refresh_master\
        .RefreshMasterTestCase.test_data_that_needs_campaign \
        --settings=rhizome.settings.test
        '''

        self.set_up()


        office_obj = Office.objects.all()[0]
        tag_obj = IndicatorTag.objects.all()[0]
        location_obj = Location.objects.all()[0]
        indicator_obj = Indicator.objects.all()[0]
        campaign_type_obj = CampaignType.objects.all()[0]
        start_date, end_date, bad_date = '2016-12-01','2016-12-30','2017-01-01'
        source_submission_obj = SourceSubmission.objects.all()[0]

        my_campaign = Campaign.objects.create(
            start_date = start_date,
            end_date = end_date,
            name = 'my_campaign',
            top_lvl_location = location_obj,
            top_lvl_indicator_tag = tag_obj,
            office = office_obj,
            campaign_type = campaign_type_obj
        )

        ## skipping over all of the refresH-master methos
        ## and  just testing this particular test case
        base_dp_dict = {
            'location_id': location_obj.id,
            'indicator_id': indicator_obj.id,
            'value': 100,
            'source_submission_id': source_submission_obj.id,
        }

        ## create dictionaries for the two datapoints ##
        no_campaign_dp_dict = {'data_date' : bad_date}
        has_campaign_dp_dict = {'data_date' : start_date}

        no_campaign_dp_dict.update(base_dp_dict)
        has_campaign_dp_dict.update(base_dp_dict)

        ## create the datapoints ##
        dp_without_campaign = DataPoint.objects.create(**no_campaign_dp_dict)
        dp_with_campaign = DataPoint.objects.create(**has_campaign_dp_dict)

        ## instantiate the MasterRefresh object ##
        mr = MasterRefresh(self.user.id, self.document.id)
        mr.ss_ids_to_process = [source_submission_obj]

        ## this is the method we are testing ##
        mr.mark_datapoints_with_needs_campaign()

        ## now fetch the data after updating the cache_job_ids ##
        no_campaign_dp_dict.pop('cache_job_id', None)
        no_campaign = DataPoint.objects.get(**no_campaign_dp_dict)
        has_campaign = DataPoint.objects.get(**has_campaign_dp_dict)

        self.assertEqual(has_campaign.cache_job_id,-1)
        self.assertEqual(no_campaign.cache_job_id,-2)

Ejemplo n.º 20

0

Mostrar archivo

Archivo: test_refresh_master.py Proyecto: mepoole/rhizome

    def test_submission_to_datapoint(self):
        '''
        This simulates the following use case:

        As a user journey we can describe this test case as:
            - user uploads file ( see how set_up method calls DocTransform )
            - user maps metadata
            - user clicks " refresh master "
                -> user checks to see if data is correct
            - user realizes that the data is wrong, due to an invalid mapping
            - user re-mapps the data and clicks " refresh master"
                -> data from old mapping should be deleted and associated to
                   the newly mapped value

        TEST CASES:
            1. WHen the submission detail is refreshed, the location/campaign ids
               that we mapped should exist in that row.
            2. DocDataPoint records are created if the necessary mapping exists
            3. There are no zero or null values allowed in doc_datapoint
            4. The doc_datapoint from #3 is merged into datpaoint.
            5. I create mappings, sync data, realize the mapping was incorrect,
               re-map the metadata and the old data should be deleted, the new
               data created.
                 -> was the old data deleted?
                 -> was the new data created?
        '''

        self.set_up()

        submission_qs = SourceSubmission.objects\
            .filter(document_id = self.document.id)\
            .values_list('id','submission_json')[0]

        ss_id, first_submission = submission_qs[0],json.loads(submission_qs[1])

        location_code = first_submission[self.location_code_input_column]
        data_date = first_submission[self.data_date_input_column]
        raw_indicator_list = [k for k,v in first_submission.iteritems()]

        indicator_code = raw_indicator_list[-1]

        ## SIMULATED USER MAPPING ##
        ## see: source-data/Nigeria/2015/06/mapping/2

        ## choose meta data values for the source_map update ##
        map_location_id = Location.objects.all()[0].id
        first_indicator_id = Indicator.objects.all()[0].id

        ## map location ##
        som_id_l = SourceObjectMap.objects.get(
            content_type = 'location',
            source_object_code = location_code,
        )
        som_id_l.master_object_id = map_location_id
        som_id_l.save()

        ## map indicator ##
        som_id_i = SourceObjectMap.objects.get(
            content_type = 'indicator',
            source_object_code = indicator_code,
        )
        som_id_i.master_object_id = first_indicator_id
        som_id_i.save()

        mr_with_new_meta = MasterRefresh(self.user.id ,self.document.id)
        mr_with_new_meta.refresh_submission_details()

        first_submission_detail = SourceSubmission.objects\
            .get(id = ss_id)

        ## Test Case 2 ##
        self.assertEqual(first_submission_detail.get_location_id(), map_location_id)

        ## now that we have created the mappign, "refresh_master" ##
        ##         should create the relevant datapoints          ##

        mr_with_new_meta.submissions_to_doc_datapoints()
        doc_dp_ids = DocDataPoint.objects.filter(document_id =
            self.document.id,indicator_id=first_indicator_id)

        ## Test Case #3
        self.assertEqual(1,len(doc_dp_ids))

        mr_with_new_meta.sync_datapoint()
        dps = DataPoint.objects.all()

        ## Test Case #4
        self.assertEqual(1,len(dps))

        ## Test Case #5

        ## update the mapping with a new indicator value ##
        new_indicator_id = Indicator.objects.all()[1].id
        som_id_i.master_object_id = new_indicator_id
        som_id_i.save()

        mr_after_new_mapping = MasterRefresh(self.user.id ,self.document.id)
        mr_after_new_mapping.main()

        dp_with_new_indicator = DataPoint.objects.filter(indicator_id = \
            new_indicator_id)

        dp_with_old_indicator = DataPoint.objects.filter(indicator_id = \
            first_indicator_id)

        ## did new indicator flow through the system ?##
        self.assertEqual(1,len(dp_with_new_indicator))

        ## did the old indicator data get deleted?
        self.assertEqual(0,len(dp_with_old_indicator))

Ejemplo n.º 21

0

Mostrar archivo

Archivo: test_refresh_master.py Proyecto: zhangsichu/rhizome

    def test_campaign_data_ingest(self):
        # ./manage.py test rhizome.tests.test_refresh_master.RefreshMasterTestCase.test_campaign_data_ingest --settings=rhizome.settings.test

        self.set_up()
        test_file_location = 'allAccessData.csv'
        test_df = read_csv('rhizome/tests/_data/' + test_file_location)

        document = Document.objects.create(doc_title='allAccessData')
        document.docfile = test_file_location
        document.save()

        ## create locatino_meta ##
        distinct_location_codes = test_df['geocode'].unique()
        for l in distinct_location_codes:
            l_id = Location.objects.create(name=l,
                                           location_code=l,
                                           location_type_id=1,
                                           office_id=1).id
            l_som = SourceObjectMap.objects.create(master_object_id=l_id,
                                                   content_type='location',
                                                   source_object_code=str(l))

        ## create campaign meta ##
        distinct_campaign_codes = test_df['campaign'].unique()
        for i, (c) in enumerate(distinct_campaign_codes):
            c_id = Campaign.objects.create(name=c,
                                           top_lvl_location_id=1,
                                           top_lvl_indicator_tag_id=1,
                                           office_id=1,
                                           campaign_type_id=1,
                                           start_date='2010-01-0' + str(i + 1),
                                           end_date='2010-01-0' +
                                           str(i + 1)).id
            c_som = SourceObjectMap.objects.create(master_object_id=c_id,
                                                   content_type='campaign',
                                                   source_object_code=str(c))

        ## create indicator_meta ##
        access_indicator_id = Indicator.objects.create(name='access',
                                                       short_name='access').id

        som_obj = SourceObjectMap.objects.create(
            master_object_id=access_indicator_id,
            content_type='indicator',
            source_object_code='# Missed children due to inaccessibility (NEPI)'
        )

        dt = ComplexDocTransform(self.user.id, document.id)
        dt.main()

        mr = MasterRefresh(self.user.id, document.id)
        mr.main()

        ss_id_list = SourceSubmission.objects\
            .filter(document_id = document.id)\
            .values_list('id', flat=True)

        doc_dp_id_list = DocDataPoint.objects\
            .filter(source_submission_id__in = ss_id_list)\
            .values_list('id', flat=True)

        dp_id_list = DataPoint.objects\
            .filter(source_submission_id__in = ss_id_list)\
            .values_list('id', flat=True)

        self.assertEqual(len(ss_id_list), len(test_df))
        self.assertEqual(len(doc_dp_id_list), len(dp_id_list))