Exemple #1
0
 def test_register_file(self, put_patch):
     put_patch.return_value = MockResponse({
         'registration_id':
         'a1-b2-c3-d4-e1e10',
         'url':
         'http://*****:*****@phony.com', '*****@*****.**')
     self.assertEqual('a1-b2-c3-d4-e1e10', registration_id)
     self.assertEqual('http://somehost:82/download/a1-b2-c3-d4-e1e10',
                      download_url)
     self.assertEqual('http://something.com/web/as', web_download_url)
def process_extraction_request(params, is_async=True):
    '''
    :param dict params: contains query parameter.  Value for each pair is expected to be a list
    :param bool is_tenant_level:  True if it is a tenant level request
    '''
    tasks = []
    response = {}
    task_responses = []
    filter_params = {}
    state_code = params[Constants.STATECODE][0]
    districts = params.get(Constants.DISTRICTGUID, [None])
    schools = params.get(Constants.SCHOOLGUID, [None])
    grades = params.get(Constants.ASMTGRADE, [None])
    request_id, user, tenant = processor.get_extract_request_user_info(state_code)

    # This is purely for file name conventions (for async extracts), consider refactoring
    is_tenant_level = is_async

    # Get filter related parameters
    if has_filters(params):
        filter_params = {k: v for k, v in params.items() if k in FILTERS_CONFIG}

    for district in districts:
        for school in schools:
            for grade in grades:
                for s in params[Constants.ASMTSUBJECT]:
                    for t in params[Constants.ASMTTYPE]:
                        param = merge_dict({Constants.ASMTSUBJECT: s,
                                            Constants.ASMTTYPE: t,
                                            Constants.ASMTYEAR: params[Constants.ASMTYEAR][0],
                                            Constants.STATECODE: state_code,
                                            Constants.SCHOOLGUID: school,
                                            Constants.DISTRICTGUID: district,
                                            Constants.ASMTGRADE: grade,
                                            Constants.STUDENTGUID: params.get(Constants.STUDENTGUID)}, filter_params)

                        task_response = {Constants.STATECODE: param[Constants.STATECODE],
                                         Constants.DISTRICTGUID: district,
                                         Constants.SCHOOLGUID: school,
                                         Extract.EXTRACTTYPE: ExtractType.studentAssessment,
                                         Constants.ASMTSUBJECT: param[Constants.ASMTSUBJECT],
                                         Constants.ASMTTYPE: param[Constants.ASMTTYPE],
                                         Constants.ASMTYEAR: param[Constants.ASMTYEAR],
                                         Extract.REQUESTID: request_id}

                        # separate by grades if no grade is specified
                        __tasks, __task_responses = _create_tasks_with_responses(request_id, user, tenant, param, task_response, is_tenant_level=is_tenant_level)
                        tasks += __tasks
                        task_responses += __task_responses
    if is_async:
        response['tasks'] = task_responses
        if tasks:
            response[Constants.FILES] = []
            files = {}
            archive_file_name = processor.get_archive_file_path(user.get_uid(), tenant, request_id)
            files[Constants.FILENAME] = os.path.basename(archive_file_name)
            directory_to_archive = processor.get_extract_work_zone_path(tenant, request_id)

            # Register extract file with HPZ.
            registration_id, download_url, web_download_url = register_file(user.get_uid(), user.get_email())

            files[Constants.DOWNLOAD_URL] = download_url
            files[Constants.WEB_DOWNLOAD_URL] = web_download_url

            response[Constants.FILES].append(files)

            queue = get_current_registry().settings.get('extract.job.queue.async', TaskConstants.DEFAULT_QUEUE_NAME)
            start_extract(tenant, request_id, [archive_file_name], [directory_to_archive], [registration_id], tasks, queue=queue)
        return response
    else:
        if tasks:
            settings = get_current_registry().settings
            queue = settings.get('extract.job.queue.sync', TaskConstants.SYNC_QUEUE_NAME)
            archive_queue = settings.get('extract.job.queue.archive', TaskConstants.ARCHIVE_QUEUE_NAME)
            directory_to_archive = processor.get_extract_work_zone_path(tenant, request_id)
            celery_timeout = int(get_current_registry().settings.get('extract.celery_timeout', '30'))
            # Synchronous calls to generate json and csv and then to archive
            # BUG, it still routes to 'extract' queue due to chain
    #        result = chain(prepare_path.subtask(args=[tenant, request_id, [directory_to_archive]], queue=queue, immutable=True),      # @UndefinedVariable
    #                       route_tasks(tenant, request_id, tasks, queue_name=queue),
    #                       archive.subtask(args=[request_id, directory_to_archive], queue=archive_queue, immutable=True)).delay()
            prepare_path.apply_async(args=[request_id, [directory_to_archive]], queue=queue, immutable=True).get(timeout=celery_timeout)  # @UndefinedVariable
            generate_extract_file_tasks(tenant, request_id, tasks, queue_name=queue)().get(timeout=celery_timeout)
            content = archive_with_stream.apply_async(args=[request_id, directory_to_archive], queue=archive_queue, immutable=True).get(timeout=celery_timeout)
            clean_up.apply_async(args=[get_extract_request_base_path(tenant, request_id)], queue=queue)  # @UndefinedVariable
            return content
        else:
            raise NotFoundException("There are no results")
def process_async_item_or_raw_extraction_request(params, extract_type):
    '''
    :param dict params: contains query parameter.  Value for each pair is expected to be a list
    :param bool is_tenant_level:  True if it is a tenant level request
    '''
    queue = get_current_registry().settings.get('extract.job.queue.async', TaskConstants.DEFAULT_QUEUE_NAME)
    soft_limit = int(get_current_registry().settings.get('extract.partial_file.size.soft_limit', '-1'))
    if extract_type is ExtractType.itemLevel:
        average_size = int(get_current_registry().settings.get('extract.partial_file.size.average.csv', '-1'))
    else:
        average_size = int(get_current_registry().settings.get('extract.partial_file.size.average.xml', '-1'))
    data_path_config_key = 'extract.item_level_base_dir' if extract_type is ExtractType.itemLevel else 'extract.raw_data_base_dir'
    root_dir = get_current_registry().settings.get(data_path_config_key)
    response = {}
    state_code = params[Constants.STATECODE]
    request_id, user, tenant = processor.get_extract_request_user_info(state_code)
    extract_params = copy.deepcopy(params)
    base_directory_to_archive = processor.get_extract_work_zone_path(tenant, request_id)

    # get an estimate for number of extract files that needs to be created based on the params
    # parts = estimate_extract_files(params=params, extract_type=extract_type)

    # temp hack till estimator is fixed. Needs to be removed and substituted with above line
    estimated_total_files = 1
    estimated_total_size = estimate_extract_total_file_size(params, average_size, extract_type)

    # No data available
    if estimated_total_size is 0:
        task_response = {}
        task_response[Extract.STATUS] = Extract.NO_DATA
        task_response[Extract.MESSAGE] = "Data is not available"
        response['tasks'] = [task_response]
    else:
        if soft_limit > 0:
            estimated_total_files = int(estimated_total_size / soft_limit)
            if estimated_total_size % soft_limit > 0:
                estimated_total_files += 1

        out_file_names = []
        directories_to_archive = []
        extract_files = []
        archive_files = []
        registration_ids = []

        for estimated_total_file in range(estimated_total_files):
            extract_file = {}
            if extract_type is ExtractType.itemLevel:
                out_file_names.append(get_items_extract_file_path(extract_params, tenant, request_id, partial_no=estimated_total_file if estimated_total_files > 1 else None))
            if estimated_total_files > 1:
                directories_to_archive.append(os.path.join(base_directory_to_archive, 'part' + str(estimated_total_file)))
                archive_file_name = processor.get_archive_file_path(user.get_uid(), tenant, request_id, partial_no=estimated_total_file)
            else:
                directories_to_archive.append(base_directory_to_archive)
                archive_file_name = processor.get_archive_file_path(user.get_uid(), tenant, request_id)
            archive_files.append(archive_file_name)
            registration_id, download_url, web_download_url = register_file(user.get_uid(), user.get_email())
            registration_ids.append(registration_id)
            extract_file[Constants.FILENAME] = os.path.basename(archive_file_name)
            extract_file[Constants.DOWNLOAD_URL] = download_url
            extract_file[Constants.WEB_DOWNLOAD_URL] = web_download_url
            extract_files.append(extract_file)

        tasks, task_responses = _create_item_or_raw_tasks_with_responses(request_id, user, extract_params, root_dir, out_file_names, directories_to_archive, extract_type)
        response['tasks'] = task_responses
        response['files'] = extract_files
        start_extract(tenant, request_id, archive_files, directories_to_archive, registration_ids, tasks, queue=queue)
    return response
def process_extraction_request(params, is_async=True):
    '''
    :param dict params: contains query parameter.  Value for each pair is expected to be a list
    :param bool is_tenant_level:  True if it is a tenant level request
    '''
    tasks = []
    response = {}
    task_responses = []
    filter_params = {}
    state_code = params[Constants.STATECODE][0]
    districts = params.get(Constants.DISTRICTGUID, [None])
    schools = params.get(Constants.SCHOOLGUID, [None])
    grades = params.get(Constants.ASMTGRADE, [None])
    request_id, user, tenant = processor.get_extract_request_user_info(
        state_code)

    # This is purely for file name conventions (for async extracts), consider refactoring
    is_tenant_level = is_async

    # Get filter related parameters
    if has_filters(params):
        filter_params = {
            k: v
            for k, v in params.items() if k in FILTERS_CONFIG
        }

    for district in districts:
        for school in schools:
            for grade in grades:
                for s in params[Constants.ASMTSUBJECT]:
                    for t in params[Constants.ASMTTYPE]:
                        param = merge_dict(
                            {
                                Constants.ASMTSUBJECT:
                                s,
                                Constants.ASMTTYPE:
                                t,
                                Constants.ASMTYEAR:
                                params[Constants.ASMTYEAR][0],
                                Constants.STATECODE:
                                state_code,
                                Constants.SCHOOLGUID:
                                school,
                                Constants.DISTRICTGUID:
                                district,
                                Constants.ASMTGRADE:
                                grade,
                                Constants.STUDENTGUID:
                                params.get(Constants.STUDENTGUID)
                            }, filter_params)

                        task_response = {
                            Constants.STATECODE: param[Constants.STATECODE],
                            Constants.DISTRICTGUID: district,
                            Constants.SCHOOLGUID: school,
                            Extract.EXTRACTTYPE: ExtractType.studentAssessment,
                            Constants.ASMTSUBJECT:
                            param[Constants.ASMTSUBJECT],
                            Constants.ASMTTYPE: param[Constants.ASMTTYPE],
                            Constants.ASMTYEAR: param[Constants.ASMTYEAR],
                            Extract.REQUESTID: request_id
                        }

                        # separate by grades if no grade is specified
                        __tasks, __task_responses = _create_tasks_with_responses(
                            request_id,
                            user,
                            tenant,
                            param,
                            task_response,
                            is_tenant_level=is_tenant_level)
                        tasks += __tasks
                        task_responses += __task_responses
    if is_async:
        response['tasks'] = task_responses
        if tasks:
            response[Constants.FILES] = []
            files = {}
            archive_file_name = processor.get_archive_file_path(
                user.get_uid(), tenant, request_id)
            files[Constants.FILENAME] = os.path.basename(archive_file_name)
            directory_to_archive = processor.get_extract_work_zone_path(
                tenant, request_id)

            # Register extract file with HPZ.
            registration_id, download_url, web_download_url = register_file(
                user.get_uid(), user.get_email())

            files[Constants.DOWNLOAD_URL] = download_url
            files[Constants.WEB_DOWNLOAD_URL] = web_download_url

            response[Constants.FILES].append(files)

            queue = get_current_registry().settings.get(
                'extract.job.queue.async', TaskConstants.DEFAULT_QUEUE_NAME)
            start_extract(tenant,
                          request_id, [archive_file_name],
                          [directory_to_archive], [registration_id],
                          tasks,
                          queue=queue)
        return response
    else:
        if tasks:
            settings = get_current_registry().settings
            queue = settings.get('extract.job.queue.sync',
                                 TaskConstants.SYNC_QUEUE_NAME)
            archive_queue = settings.get('extract.job.queue.archive',
                                         TaskConstants.ARCHIVE_QUEUE_NAME)
            directory_to_archive = processor.get_extract_work_zone_path(
                tenant, request_id)
            celery_timeout = int(get_current_registry().settings.get(
                'extract.celery_timeout', '30'))
            # Synchronous calls to generate json and csv and then to archive
            # BUG, it still routes to 'extract' queue due to chain
            #        result = chain(prepare_path.subtask(args=[tenant, request_id, [directory_to_archive]], queue=queue, immutable=True),      # @UndefinedVariable
            #                       route_tasks(tenant, request_id, tasks, queue_name=queue),
            #                       archive.subtask(args=[request_id, directory_to_archive], queue=archive_queue, immutable=True)).delay()
            prepare_path.apply_async(
                args=[request_id, [directory_to_archive]],
                queue=queue,
                immutable=True).get(
                    timeout=celery_timeout)  # @UndefinedVariable
            generate_extract_file_tasks(
                tenant, request_id, tasks,
                queue_name=queue)().get(timeout=celery_timeout)
            content = archive_with_stream.apply_async(
                args=[request_id, directory_to_archive],
                queue=archive_queue,
                immutable=True).get(timeout=celery_timeout)
            clean_up.apply_async(
                args=[get_extract_request_base_path(tenant, request_id)],
                queue=queue)  # @UndefinedVariable
            return content
        else:
            raise NotFoundException("There are no results")
def process_async_item_or_raw_extraction_request(params, extract_type):
    '''
    :param dict params: contains query parameter.  Value for each pair is expected to be a list
    :param bool is_tenant_level:  True if it is a tenant level request
    '''
    queue = get_current_registry().settings.get(
        'extract.job.queue.async', TaskConstants.DEFAULT_QUEUE_NAME)
    soft_limit = int(get_current_registry().settings.get(
        'extract.partial_file.size.soft_limit', '-1'))
    if extract_type is ExtractType.itemLevel:
        average_size = int(get_current_registry().settings.get(
            'extract.partial_file.size.average.csv', '-1'))
    else:
        average_size = int(get_current_registry().settings.get(
            'extract.partial_file.size.average.xml', '-1'))
    data_path_config_key = 'extract.item_level_base_dir' if extract_type is ExtractType.itemLevel else 'extract.raw_data_base_dir'
    root_dir = get_current_registry().settings.get(data_path_config_key)
    response = {}
    state_code = params[Constants.STATECODE]
    request_id, user, tenant = processor.get_extract_request_user_info(
        state_code)
    extract_params = copy.deepcopy(params)
    base_directory_to_archive = processor.get_extract_work_zone_path(
        tenant, request_id)

    # get an estimate for number of extract files that needs to be created based on the params
    # parts = estimate_extract_files(params=params, extract_type=extract_type)

    # temp hack till estimator is fixed. Needs to be removed and substituted with above line
    estimated_total_files = 1
    estimated_total_size = estimate_extract_total_file_size(
        params, average_size, extract_type)

    # No data available
    if estimated_total_size is 0:
        task_response = {}
        task_response[Extract.STATUS] = Extract.NO_DATA
        task_response[Extract.MESSAGE] = "Data is not available"
        response['tasks'] = [task_response]
    else:
        if soft_limit > 0:
            estimated_total_files = int(estimated_total_size / soft_limit)
            if estimated_total_size % soft_limit > 0:
                estimated_total_files += 1

        out_file_names = []
        directories_to_archive = []
        extract_files = []
        archive_files = []
        registration_ids = []

        for estimated_total_file in range(estimated_total_files):
            extract_file = {}
            if extract_type is ExtractType.itemLevel:
                out_file_names.append(
                    get_items_extract_file_path(
                        extract_params,
                        tenant,
                        request_id,
                        partial_no=estimated_total_file
                        if estimated_total_files > 1 else None))
            if estimated_total_files > 1:
                directories_to_archive.append(
                    os.path.join(base_directory_to_archive,
                                 'part' + str(estimated_total_file)))
                archive_file_name = processor.get_archive_file_path(
                    user.get_uid(),
                    tenant,
                    request_id,
                    partial_no=estimated_total_file)
            else:
                directories_to_archive.append(base_directory_to_archive)
                archive_file_name = processor.get_archive_file_path(
                    user.get_uid(), tenant, request_id)
            archive_files.append(archive_file_name)
            registration_id, download_url, web_download_url = register_file(
                user.get_uid(), user.get_email())
            registration_ids.append(registration_id)
            extract_file[Constants.FILENAME] = os.path.basename(
                archive_file_name)
            extract_file[Constants.DOWNLOAD_URL] = download_url
            extract_file[Constants.WEB_DOWNLOAD_URL] = web_download_url
            extract_files.append(extract_file)

        tasks, task_responses = _create_item_or_raw_tasks_with_responses(
            request_id, user, extract_params, root_dir, out_file_names,
            directories_to_archive, extract_type)
        response['tasks'] = task_responses
        response['files'] = extract_files
        start_extract(tenant,
                      request_id,
                      archive_files,
                      directories_to_archive,
                      registration_ids,
                      tasks,
                      queue=queue)
    return response
Exemple #6
0
def process_extraction_request(params):
    """
    @param params: Extract request parameters

    @return:  Extract response
    """

    queue = get_current_registry().settings.get(
        'extract.job.queue.async', TaskConstants.DEFAULT_QUEUE_NAME)
    response = {}
    extract_file = {}
    state_code = params[EndpointConstants.STATECODE]
    request_id, user, tenant = processor.get_extract_request_user_info(
        state_code)

    extract_type = params[Extract.EXTRACTTYPE]
    extraction_data_type = ''
    if extract_type == ExtractType.studentRegistrationStatistics:
        extraction_data_type = ExtractionDataType.SR_STATISTICS
    elif extract_type == ExtractType.studentAssessmentCompletion:
        extraction_data_type = ExtractionDataType.SR_COMPLETION

    extract_params = {
        TaskConstants.STATE_CODE: state_code,
        TaskConstants.ACADEMIC_YEAR: params[EndpointConstants.ACADEMIC_YEAR],
        Extract.REPORT_TYPE: extract_type,
        TaskConstants.EXTRACTION_DATA_TYPE: extraction_data_type
    }

    task_response = {
        TaskConstants.STATE_CODE: extract_params[TaskConstants.STATE_CODE],
        TaskConstants.ACADEMIC_YEAR:
        extract_params[TaskConstants.ACADEMIC_YEAR],
        Extract.EXTRACTTYPE: extract_params[Extract.REPORT_TYPE],
        Extract.REQUESTID: request_id,
        Extract.STATUS: Extract.OK
    }

    task_info = _create_task_info(request_id, user, tenant, extract_params)

    response['tasks'] = [task_response]

    archived_file_path = processor.get_archive_file_path(
        user.get_uid(), tenant, request_id)
    extract_file['fileName'] = os.path.basename(archived_file_path)

    data_directory_to_archive = processor.get_extract_work_zone_path(
        tenant, request_id)

    # Register extract file with HPZ.
    registration_id, download_url, web_download_url = register_file(
        user.get_uid(), user.get_email())
    extract_file[EndpointConstants.DOWNLOAD_URL] = download_url
    extract_file[EndpointConstants.WEB_DOWNLOAD_URL] = web_download_url
    response[EndpointConstants.FILES] = [extract_file]

    start_extract(tenant,
                  request_id, [archived_file_path],
                  [data_directory_to_archive], [registration_id], [task_info],
                  queue=queue)

    return response
 def test_register_file(self, put_patch):
     put_patch.return_value = MockResponse({'registration_id': 'a1-b2-c3-d4-e1e10', 'url': 'http://*****:*****@phony.com', '*****@*****.**')
     self.assertEqual('a1-b2-c3-d4-e1e10', registration_id)
     self.assertEqual('http://somehost:82/download/a1-b2-c3-d4-e1e10', download_url)
     self.assertEqual('http://something.com/web/as', web_download_url)
Exemple #8
0
def get_bulk_pdf_content(settings, pdf_base_dir, base_url, subprocess_timeout, student_ids, grades,
                         state_code, district_id, school_id, asmt_type, asmt_year, lang,
                         is_grayscale, always_generate, celery_timeout, params):
    '''
    Read pdf content from file system if it exists, else generate it

    :param params: python dict that contains query parameters from the request
    '''
    # Get the user
    user = authenticated_userid(get_current_request())

    # If we do not have a list of student GUIDs, we need to get it
    all_guids, guids_by_grade = _create_student_ids(student_ids, grades, state_code,
                                                    district_id, school_id, asmt_type,
                                                    asmt_year, params)

    # Get all file names
    date_taken = None
    files_by_student_id = generate_isr_report_path_by_student_id(state_code, date_taken, asmt_year,
                                                                 pdf_report_base_dir=pdf_base_dir,
                                                                 student_ids=all_guids,
                                                                 asmt_type=asmt_type,
                                                                 grayScale=is_grayscale,
                                                                 lang=lang)

    # Set up a few additional variables
    urls_by_student_id = _create_urls_by_student_id(all_guids, state_code, base_url, params, files_by_student_id)

    # Register expected file with HPZ
    registration_id, download_url, web_download_url = register_file(user.get_uid(), user.get_email())

    # Get the name of the school
    school_name = _get_school_name(state_code, district_id, school_id)

    # Set up directory and file names
    directory_to_archive = os.path.join(pdf_base_dir, Constants.BULK, registration_id, Constants.DATA)
    directory_for_merged_pdfs = os.path.join(pdf_base_dir, Constants.BULK, registration_id, Constants.MERGED)
    directory_for_cover_sheets = os.path.join(pdf_base_dir, Constants.BULK, registration_id, Constants.COVER)
    directory_for_zip = os.path.join(pdf_base_dir, Constants.BULK, registration_id, Constants.ZIP)
    archive_file_name = _get_archive_name(school_name, lang, is_grayscale)
    archive_file_path = os.path.join(directory_for_zip, archive_file_name)

    # Create JSON response
    response = {
        Constants.FILES: [
            {
                Constants.FILENAME: archive_file_name,
                Constants.DOWNLOAD_URL: download_url,
                Constants.WEB_DOWNLOAD_URL: web_download_url
            }
        ]
    }

    # Generate cookie
    pdfGenerator = PDFGenerator(settings)

    # Create the tasks for each individual student PDF file we want to merge
    generate_tasks = _create_pdf_generate_tasks(pdfGenerator.cookie_value,
                                                pdfGenerator.cookie_name,
                                                is_grayscale,
                                                always_generate,
                                                files_by_student_id,
                                                urls_by_student_id)

    # Create the tasks to merge each PDF by grade
    merge_tasks, merged_pdfs_by_grade, student_report_count_by_pdf = _create_pdf_merge_tasks(
        pdf_base_dir,
        directory_for_merged_pdfs,
        guids_by_grade,
        files_by_student_id,
        school_name,
        lang,
        is_grayscale
    )

    # Get metadata for tenant branding
    custom_metadata = get_custom_metadata(state_code)

    # Create tasks for cover sheets
    cover_sheet_tasks, cover_sheets_by_grade = _create_cover_sheet_generate_tasks(
        pdfGenerator.cookie_value,
        pdfGenerator.cookie_name,
        is_grayscale, school_name,
        user._User__info['name']['fullName'],
        custom_metadata,
        directory_for_cover_sheets,
        merged_pdfs_by_grade,
        student_report_count_by_pdf
    )

    # Create tasks to merge in cover sheets
    merge_covers_tasks = _create_pdf_cover_merge_tasks(merged_pdfs_by_grade, cover_sheets_by_grade,
                                                       directory_to_archive, pdf_base_dir)

    # Start the bulk merge
    _start_bulk(archive_file_path, directory_to_archive, registration_id, generate_tasks,
                merge_tasks, cover_sheet_tasks, merge_covers_tasks, pdf_base_dir)

    # Return the JSON response while the bulk merge runs asynchronously
    return Response(body=json.dumps(response), content_type=Constants.APPLICATION_JSON)
Exemple #9
0
def get_bulk_pdf_content(settings, pdf_base_dir, base_url, subprocess_timeout,
                         student_ids, grades, state_code, district_id,
                         school_id, asmt_type, asmt_year, lang, is_grayscale,
                         always_generate, celery_timeout, params):
    '''
    Read pdf content from file system if it exists, else generate it

    :param params: python dict that contains query parameters from the request
    '''
    # Get the user
    user = authenticated_userid(get_current_request())

    # If we do not have a list of student GUIDs, we need to get it
    all_guids, guids_by_grade = _create_student_ids(student_ids, grades,
                                                    state_code, district_id,
                                                    school_id, asmt_type,
                                                    asmt_year, params)

    # Get all file names
    date_taken = None
    files_by_student_id = generate_isr_report_path_by_student_id(
        state_code,
        date_taken,
        asmt_year,
        pdf_report_base_dir=pdf_base_dir,
        student_ids=all_guids,
        asmt_type=asmt_type,
        grayScale=is_grayscale,
        lang=lang)

    # Set up a few additional variables
    urls_by_student_id = _create_urls_by_student_id(all_guids, state_code,
                                                    base_url, params,
                                                    files_by_student_id)

    # Register expected file with HPZ
    registration_id, download_url, web_download_url = register_file(
        user.get_uid(), user.get_email())

    # Get the name of the school
    school_name = _get_school_name(state_code, district_id, school_id)

    # Set up directory and file names
    directory_to_archive = os.path.join(pdf_base_dir, Constants.BULK,
                                        registration_id, Constants.DATA)
    directory_for_merged_pdfs = os.path.join(pdf_base_dir, Constants.BULK,
                                             registration_id, Constants.MERGED)
    directory_for_cover_sheets = os.path.join(pdf_base_dir, Constants.BULK,
                                              registration_id, Constants.COVER)
    directory_for_zip = os.path.join(pdf_base_dir, Constants.BULK,
                                     registration_id, Constants.ZIP)
    archive_file_name = _get_archive_name(school_name, lang, is_grayscale)
    archive_file_path = os.path.join(directory_for_zip, archive_file_name)

    # Create JSON response
    response = {
        Constants.FILES: [{
            Constants.FILENAME: archive_file_name,
            Constants.DOWNLOAD_URL: download_url,
            Constants.WEB_DOWNLOAD_URL: web_download_url
        }]
    }

    # Generate cookie
    pdfGenerator = PDFGenerator(settings)

    # Create the tasks for each individual student PDF file we want to merge
    generate_tasks = _create_pdf_generate_tasks(pdfGenerator.cookie_value,
                                                pdfGenerator.cookie_name,
                                                is_grayscale, always_generate,
                                                files_by_student_id,
                                                urls_by_student_id)

    # Create the tasks to merge each PDF by grade
    merge_tasks, merged_pdfs_by_grade, student_report_count_by_pdf = _create_pdf_merge_tasks(
        pdf_base_dir, directory_for_merged_pdfs, guids_by_grade,
        files_by_student_id, school_name, lang, is_grayscale)

    # Get metadata for tenant branding
    custom_metadata = get_custom_metadata(state_code)

    # Create tasks for cover sheets
    cover_sheet_tasks, cover_sheets_by_grade = _create_cover_sheet_generate_tasks(
        pdfGenerator.cookie_value, pdfGenerator.cookie_name, is_grayscale,
        school_name, user._User__info['name']['fullName'], custom_metadata,
        directory_for_cover_sheets, merged_pdfs_by_grade,
        student_report_count_by_pdf)

    # Create tasks to merge in cover sheets
    merge_covers_tasks = _create_pdf_cover_merge_tasks(merged_pdfs_by_grade,
                                                       cover_sheets_by_grade,
                                                       directory_to_archive,
                                                       pdf_base_dir)

    # Start the bulk merge
    _start_bulk(archive_file_path, directory_to_archive, registration_id,
                generate_tasks, merge_tasks, cover_sheet_tasks,
                merge_covers_tasks, pdf_base_dir)

    # Return the JSON response while the bulk merge runs asynchronously
    return Response(body=json.dumps(response),
                    content_type=Constants.APPLICATION_JSON)