Python DocTransform Examples, source_data.etl_tasks.transform_upload.DocTransform Python Examples

Example #1

0

Show file

    def test_doc_to_source_submission(self):
        '''
        Part of the set up method, this takes a csv and inserts it into the
        source submission table.  This method in context of this test represents
        what would happen when a user uploads a csv and the data flows through
        "etl_tasks/transform_upload"

        Uploading the csv to the server is itself a different task.. so for now
        we preform "transform_upload" on the test file.

        This method is in charge of one specific thing.. taking an input stream
        such as a csv, or an ODK submission, and creating one row in the
        database with the schema that it was received.  Later in the ingest
        process, users are allowed to specify settings to each file in order
        to translate them into data the application can consume and visualize.

        The Doc Transofrm Method is responsible for the following:
            1. Inserting one record into source_submission for each csv row
            2. Inserting any new mappings into source_object_map
            3. Associating *all* source_object_maps with self.document_id ( even
              those created in other documents)
            4. Inserting one record into submission_detail        '''

        self.set_up()

        dt = DocTransform(self.user.id, self.document.id)

        source_submissions = dt.process_file()

        test_file = open(settings.MEDIA_ROOT + self.test_file_location, 'r')
        file_line_count = sum(1 for line in test_file) - 1  # for the header!

        self.assertEqual(len(source_submissions), file_line_count)

Example #2

0

Show file

File: meta_data.py Project: SudoScience/rhizome

    def get_object_list(self,request):

        doc_id = request.GET['document_id']
        dt = DocTransform(1,doc_id)
        dt.main()

        return Document.objects.filter(id = doc_id).values()

Example #3

0

Show file

File: test_transform_upload.py Project: unicef/polio

    def test_doc_to_source_submission(self):
        '''
        Part of the set up method, this takes a csv and inserts it into the
        source submission table.  This method in context of this test represents
        what would happen when a user uploads a csv and the data flows through
        "etl_tasks/transform_upload"

        Uploading the csv to the server is itself a different task.. so for now
        we preform "transform_upload" on the test file.

        This method is in charge of one specific thing.. taking an input stream
        such as a csv, or an ODK submission, and creating one row in the
        database with the schema that it was received.  Later in the ingest
        process, users are allowed to specify settings to each file in order
        to translate them into data the application can consume and visualize.

        The Doc Transofrm Method is responsible for the following:
            1. Inserting one record into source_submission for each csv row
            2. Inserting any new mappings into source_object_map
            3. Associating *all* source_object_maps with self.document_id ( even
              those created in other documents)
            4. Inserting one record into source_submission_detail        '''

        self.set_up()

        dt = DocTransform(self.user.id, self.document.id\
            , self.test_file_location)

        source_submissions = dt.process_file()

        test_file = open(settings.MEDIA_ROOT + self.test_file_location ,'r')
        file_line_count = sum(1 for line in test_file) - 1 # for the header!

        self.assertEqual(len(source_submissions),file_line_count)

Example #4

0

Show file

File: test_refresh_master.py Project: unicef/polio

    def set_up(self):

        self.region_list = Region.objects.all().values_list('name',flat=True)
        self.test_file_location = 'ebola_data.csv'

        self.create_metadata()
        self.user = User.objects.get(username = '******')
        self.document = Document.objects.get(doc_title = 'test')

        dt = DocTransform(self.user.id, self.document.id\
            , self.test_file_location)

        self.source_submissions_ids = dt.process_file()

Example #5

0

Show file

File: views.py Project: marks/polio

def pre_process_file(request, document_id):

    column_mappings = {}
    column_mappings["campaign_col"] = request.GET["campaign_col"]
    column_mappings["value_col"] = request.GET["value_col"]
    column_mappings["region_code_col"] = request.GET["region_code_col"]
    column_mappings["indicator_col"] = request.GET["indicator_col"]

    dt = DocTransform(document_id, column_mappings)

    try:
        sdps = dt.dp_df_to_source_datapoints()
    except IntegrityError:
        sdps = SourceDataPoint.objects.filter(document_id=document_id)

    populate_document_metadata(document_id)

    return HttpResponseRedirect(reverse("source_data:field_mapping", kwargs={"document_id": document_id}))

Example #6

0

Show file

File: views.py Project: vivihuang/polio

def file_upload(request):

    accepted_file_formats = ['.csv', '.xls', '.xlsx']

    if request.method == 'GET':
        form = DocumentForm()

        return render_to_response('upload/file_upload.html', {'form': form},
                                  context_instance=RequestContext(request))

    elif request.method == 'POST':

        created_by = request.user
        to_upload = request.FILES['docfile']
        newdoc = Document.objects.create(docfile=to_upload,
                                         created_by=created_by)

        dt = DocTransform(newdoc.id)
        source_submissions = dt.process_file()

        return HttpResponseRedirect('/doc_review/overview/%s' % newdoc.id)

Example #7

0

Show file

File: test_refresh_master.py Project: vhpgomes/rhizome

    def set_up(self):
        '''
        Refresh master needs a few peices of metadata to be abel to do it's job.
        Location, Campaign, User .. all of the main models that you can see
        initialized in the first migrations in the datapoints application.

        The set up method also runs the DocTransform method which simulates
        the upload of a csv or processing of an ODK submission.  Ideally this
        test will run independently of this module, but for now this is how
        we initialize data in the system via the .csv below.
        '''
        self.test_file_location = 'ebola_data.csv'
        self.location_list = Location.objects.all().values_list('name',flat=True)
        self.create_metadata()
        self.user = User.objects.get(username = '******')

        self.document = Document.objects.get(doc_title = 'test')
        self.document.docfile = self.test_file_location
        self.document.save()

        dt = DocTransform(self.user.id, self.document.id)
        dt.main()

Example #8

0

Show file

File: views.py Project: vivihuang/polio

def file_upload(request):

    accepted_file_formats = ['.csv','.xls','.xlsx']

    if request.method == 'GET':
        form = DocumentForm()

        return render_to_response(
            'upload/file_upload.html',
            {'form': form},
            context_instance=RequestContext(request)
        )

    elif request.method == 'POST':

        created_by = request.user
        to_upload = request.FILES['docfile']
        newdoc = Document.objects.create(docfile=to_upload,created_by=created_by)

        dt = DocTransform(newdoc.id)
        source_submissions = dt.process_file()

        return HttpResponseRedirect('/doc_review/overview/%s' % newdoc.id)

Example #9

0

Show file

    def set_up(self):
        '''
        Refresh master needs a few peices of metadata to be abel to do it's job.
        Location, Campaign, User .. all of the main models that you can see
        initialized in the first migrations in the datapoints application.

        The set up method also runs the DocTransform method which simulates
        the upload of a csv or processing of an ODK submission.  Ideally this
        test will run independently of this module, but for now this is how
        we initialize data in the system via the .csv below.
        '''
        self.test_file_location = 'ebola_data.csv'
        self.location_list = Location.objects.all().values_list('name',
                                                                flat=True)
        self.create_metadata()
        self.user = User.objects.get(username='******')

        self.document = Document.objects.get(doc_title='test')
        self.document.docfile = self.test_file_location
        self.document.save()

        dt = DocTransform(self.user.id, self.document.id)

        self.source_submissions_ids = dt.process_file()

Example #10

0

Show file

File: meta_data.py Project: atuljain/rhizome

    def get_object_list(self, request):
        doc_id = request.GET['document_id']
        dt = DocTransform(request.user.id, doc_id)
        dt.main()

        return Document.objects.filter(id=doc_id).values()