def _generate_common_message(guid_batch, load_type, file_parts, archive_file, lzw, initial_msg): initial_msg = {} if initial_msg is None else initial_msg msg = { mk.GUID_BATCH: guid_batch, mk.LOAD_TYPE: load_type, mk.PARTS: file_parts, mk.START_TIMESTAMP: datetime.datetime.now(), mk.INPUT_FILE_PATH: archive_file, mk.LANDING_ZONE_WORK_DIR: lzw } return merge_dict(initial_msg, msg)
def get_load_from_csv_tasks(msg): ''' Returns a chord of tasks to migrate from csv to staging ''' guid_batch = msg[mk.GUID_BATCH] lzw = msg[mk.LANDING_ZONE_WORK_DIR] header_file_path = msg[mk.HEADER_FILE_PATH] load_type = msg[mk.LOAD_TYPE] split_file_tuple_list = msg[mk.SPLIT_FILE_LIST] loader_tasks = [] for split_file_tuple in split_file_tuple_list: message_for_file_loader = merge_dict(generate_msg_for_file_loader(split_file_tuple, header_file_path, lzw, guid_batch, load_type), msg) loader_tasks.append(W_load_csv_to_staging.task.subtask(args=[message_for_file_loader])) return chord(group(loader_tasks), handle_group_results.s())
def insert_extract_stats(*dict_values): ''' Insert into extract status table :params dict values: one or more dictionary of values to insert ex. dict_values = {Constants.TENANT, "tenantName": Constants.STATE_CODE: "EX"}, {Constants.OTHER: "test"} ''' values = {Constants.TIMESTAMP: datetime.utcnow()} for d in dict_values: values = merge_dict(d, values) with StatsDBConnection() as connector: extract_stats = connector.get_table(Constants.EXTRACT_STATS) stmt = extract_stats.insert(values) connector.execute(stmt)
def get_load_from_csv_tasks(msg): ''' Returns a chord of tasks to migrate from csv to staging ''' guid_batch = msg[mk.GUID_BATCH] lzw = msg[mk.LANDING_ZONE_WORK_DIR] header_file_path = msg[mk.HEADER_FILE_PATH] load_type = msg[mk.LOAD_TYPE] split_file_tuple_list = msg[mk.SPLIT_FILE_LIST] loader_tasks = [] for split_file_tuple in split_file_tuple_list: message_for_file_loader = merge_dict( generate_msg_for_file_loader(split_file_tuple, header_file_path, lzw, guid_batch, load_type), msg) loader_tasks.append( W_load_csv_to_staging.task.subtask(args=[message_for_file_loader])) return chord(group(loader_tasks), handle_group_results.s())
def process_extraction_request(params, is_async=True): ''' :param dict params: contains query parameter. Value for each pair is expected to be a list :param bool is_tenant_level: True if it is a tenant level request ''' tasks = [] response = {} task_responses = [] filter_params = {} state_code = params[Constants.STATECODE][0] districts = params.get(Constants.DISTRICTGUID, [None]) schools = params.get(Constants.SCHOOLGUID, [None]) grades = params.get(Constants.ASMTGRADE, [None]) request_id, user, tenant = processor.get_extract_request_user_info(state_code) # This is purely for file name conventions (for async extracts), consider refactoring is_tenant_level = is_async # Get filter related parameters if has_filters(params): filter_params = {k: v for k, v in params.items() if k in FILTERS_CONFIG} for district in districts: for school in schools: for grade in grades: for s in params[Constants.ASMTSUBJECT]: for t in params[Constants.ASMTTYPE]: param = merge_dict({Constants.ASMTSUBJECT: s, Constants.ASMTTYPE: t, Constants.ASMTYEAR: params[Constants.ASMTYEAR][0], Constants.STATECODE: state_code, Constants.SCHOOLGUID: school, Constants.DISTRICTGUID: district, Constants.ASMTGRADE: grade, Constants.STUDENTGUID: params.get(Constants.STUDENTGUID)}, filter_params) task_response = {Constants.STATECODE: param[Constants.STATECODE], Constants.DISTRICTGUID: district, Constants.SCHOOLGUID: school, Extract.EXTRACTTYPE: ExtractType.studentAssessment, Constants.ASMTSUBJECT: param[Constants.ASMTSUBJECT], Constants.ASMTTYPE: param[Constants.ASMTTYPE], Constants.ASMTYEAR: param[Constants.ASMTYEAR], Extract.REQUESTID: request_id} # separate by grades if no grade is specified __tasks, __task_responses = _create_tasks_with_responses(request_id, user, tenant, param, task_response, is_tenant_level=is_tenant_level) tasks += __tasks task_responses += __task_responses if is_async: response['tasks'] = task_responses if tasks: response[Constants.FILES] = [] files = {} archive_file_name = processor.get_archive_file_path(user.get_uid(), tenant, request_id) files[Constants.FILENAME] = os.path.basename(archive_file_name) directory_to_archive = processor.get_extract_work_zone_path(tenant, request_id) # Register extract file with HPZ. registration_id, download_url, web_download_url = register_file(user.get_uid(), user.get_email()) files[Constants.DOWNLOAD_URL] = download_url files[Constants.WEB_DOWNLOAD_URL] = web_download_url response[Constants.FILES].append(files) queue = get_current_registry().settings.get('extract.job.queue.async', TaskConstants.DEFAULT_QUEUE_NAME) start_extract(tenant, request_id, [archive_file_name], [directory_to_archive], [registration_id], tasks, queue=queue) return response else: if tasks: settings = get_current_registry().settings queue = settings.get('extract.job.queue.sync', TaskConstants.SYNC_QUEUE_NAME) archive_queue = settings.get('extract.job.queue.archive', TaskConstants.ARCHIVE_QUEUE_NAME) directory_to_archive = processor.get_extract_work_zone_path(tenant, request_id) celery_timeout = int(get_current_registry().settings.get('extract.celery_timeout', '30')) # Synchronous calls to generate json and csv and then to archive # BUG, it still routes to 'extract' queue due to chain # result = chain(prepare_path.subtask(args=[tenant, request_id, [directory_to_archive]], queue=queue, immutable=True), # @UndefinedVariable # route_tasks(tenant, request_id, tasks, queue_name=queue), # archive.subtask(args=[request_id, directory_to_archive], queue=archive_queue, immutable=True)).delay() prepare_path.apply_async(args=[request_id, [directory_to_archive]], queue=queue, immutable=True).get(timeout=celery_timeout) # @UndefinedVariable generate_extract_file_tasks(tenant, request_id, tasks, queue_name=queue)().get(timeout=celery_timeout) content = archive_with_stream.apply_async(args=[request_id, directory_to_archive], queue=archive_queue, immutable=True).get(timeout=celery_timeout) clean_up.apply_async(args=[get_extract_request_base_path(tenant, request_id)], queue=queue) # @UndefinedVariable return content else: raise NotFoundException("There are no results")
merge_dict( { Constants.STATECODE: { "type": "string", "pattern": "^[a-zA-Z]{2}$", "required": True }, Constants.ASMTYEAR: { "type": "string", "pattern": "^\d{4}$", "required": True }, Constants.ASMTTYPE: { "type": "string", "pattern": "^(" + AssessmentType.SUMMATIVE + "|" + AssessmentType.INTERIM_COMPREHENSIVE + ")$", "required": True }, Constants.ASMTSUBJECT: { "type": "string", "pattern": "^(" + Constants.MATH + "|" + Constants.ELA + ")$", "required": True }, Constants.ASMTGRADE: { "type": "string", "pattern": "^[K0-9]+$", "maxLength": 2, "required": True, } }, FILTERS_CONFIG)
REPORT_PARAMS = merge_dict( { Constants.STATECODE: { "type": "string", "required": True, "pattern": "^[a-zA-Z0-9\-]{0,50}$", }, Constants.DISTRICTGUID: { "type": "string", "required": True, "pattern": "^[a-zA-Z0-9\-]{0,50}$", }, Constants.SCHOOLGUID: { "type": "string", "required": True, "pattern": "^[a-zA-Z0-9\-]{0,50}$", }, Constants.ASMTGRADE: { "type": "string", "maxLength": 2, "required": False, "pattern": "^[K0-9]+$", }, Constants.ASMTSUBJECT: { "type": "array", "required": False, "items": { "type": "string", "pattern": "^(" + Constants.ELA + "|" + Constants.MATH + ")$", } }, Constants.ASMTYEAR: { "type": "integer", "required": False, "pattern": "^[1-9][0-9]{3}$" }, Constants.ASMTTYPE: { "enum": [ AssessmentType.INTERIM_ASSESSMENT_BLOCKS, AssessmentType.INTERIM_COMPREHENSIVE, AssessmentType.SUMMATIVE ], "required": False } }, FILTERS_CONFIG)
"properties": merge_dict({ Extract.EXTRACTTYPE: { "type": "array", "items": { "type": "string", "pattern": "^" + ExtractType.studentAssessment + "$" }, "minItems": 1, "uniqueItems": True, "required": False }, Constants.ASMTTYPE: { "type": "array", "items": { "type": "string", "pattern": "^(" + AssessmentType.SUMMATIVE + "|" + AssessmentType.INTERIM_COMPREHENSIVE + "|" + AssessmentType.INTERIM_ASSESSMENT_BLOCKS + ")$" }, "minItems": 1, "uniqueItems": True, "required": True }, Constants.ASMTSUBJECT: { "type": "array", "items": { "type": "string", "pattern": "^(" + Constants.MATH + "|" + Constants.ELA + ")$" }, "minItems": 1, "uniqueItems": True, "required": True }, Constants.ASMTYEAR: { "type": "array", "items": { "type": "string", "pattern": "^\d{4}$" }, "minItems": 1, "uniqueItems": True, "required": False }, Constants.STATECODE: { "type": "array", "items": { "type": "string", "pattern": "^[a-zA-Z]{2}$" }, "minItems": 1, "uniqueItems": True, "required": True, }, Constants.DISTRICTGUID: { "type": "array", "items": { "type": "string", "pattern": "^[a-zA-Z0-9\-]{0,50}$", }, "minItems": 1, "uniqueItems": True, "required": False }, Constants.SCHOOLGUID: { "type": "array", "items": { "type": "string", "pattern": "^[a-zA-Z0-9\-]{0,50}$", }, "minItems": 1, "uniqueItems": True, "required": False }, Constants.ASMTGRADE: { "type": "array", "items": { "type": "string", "pattern": "^[a-zA-Z0-9\-]{0,50}$", }, "minItems": 1, "uniqueItems": True, "required": False }, Constants.STUDENTGUID: { "type": "array", "items": { "type": "string", "pattern": "^[a-zA-Z0-9\-]{0,50}$" }, "minItems": 1, "uniqueItems": True, "required": False }, Extract.SYNC: { "type": "string", "required": False, "pattern": "^(true|TRUE)$", }, Extract.ASYNC: { "type": "string", "required": False, "pattern": "^(true|TRUE)$", }, Constants.SL: { # this is added by GET request inside browsers "type": "string", "pattern": "^\d+$", "required": False } }, FILTERS_CONFIG)
merge_dict( { Constants.STATECODE: { "type": "string", "required": True, "pattern": "^[a-zA-Z]{2}$" }, Constants.STUDENTGUID: { "type": "array", "items": { "type": "string", "pattern": "^[a-zA-Z0-9\-]{0,50}$" }, "minItems": 1, "uniqueItems": True, "required": False }, Constants.DISTRICTGUID: { "type": "string", "required": False, "pattern": "^[a-zA-Z0-9\-]{0,50}$", }, Constants.SCHOOLGUID: { "type": "string", "required": False, "pattern": "^[a-zA-Z0-9\-]{0,50}$", }, Constants.ASMTGRADE: { "type": "array", "items": { "type": "string", "pattern": "^[0-9]{1,2}$" }, "minitems": 1, "uniqueItems": True, "required": False }, Constants.ASMTTYPE: { "type": "string", "required": False, "pattern": "^(" + AssessmentType.INTERIM_ASSESSMENT_BLOCKS + "|" + AssessmentType.SUMMATIVE + "|" + AssessmentType.INTERIM_COMPREHENSIVE + ")$", }, Constants.ASMTYEAR: { "type": "integer", "required": True, "pattern": "^2[0-9]{3}$" }, Constants.DATETAKEN: { "type": "integer", "required": False, "pattern": "^[0-9]{8}$" }, Constants.MODE: { "type": "string", "required": False, "pattern": "^(gray|GRAY|color|COLOR)$", }, Constants.LANG: { "type": "string", "required": False, "pattern": "^[a-z]{2}$", }, Constants.PDF: { "type": "string", "required": False, "pattern": "^(true|false|TRUE|FALSE)$", }, Constants.SL: { "type": "string", "required": False, "pattern": "^\d+$", } }, FILTERS_CONFIG)
def test_merge_dict(self): self.assertDictEqual(merge_dict({}, {}), {}) self.assertDictEqual(merge_dict({'a': 'b'}, {'c': 'd'}), {'a': 'b', 'c': 'd'}) self.assertDictEqual(merge_dict({'a': 'b'}, {'a': 'd'}), {'a': 'b'})
"properties": merge_dict({ Constants.STATECODE: { "type": "string", "required": True, "pattern": "^[a-zA-Z]{2}$" }, Constants.STUDENTGUID: { "type": "array", "items": { "type": "string", "pattern": "^[a-zA-Z0-9\-]{0,50}$" }, "minItems": 1, "uniqueItems": True, "required": False }, Constants.DISTRICTGUID: { "type": "string", "required": False, "pattern": "^[a-zA-Z0-9\-]{0,50}$", }, Constants.SCHOOLGUID: { "type": "string", "required": False, "pattern": "^[a-zA-Z0-9\-]{0,50}$", }, Constants.ASMTGRADE: { "type": "array", "items": { "type": "string", "pattern": "^[0-9]{1,2}$" }, "minitems": 1, "uniqueItems": True, "required": False }, Constants.ASMTTYPE: { "type": "string", "required": False, "pattern": "^(" + AssessmentType.INTERIM_ASSESSMENT_BLOCKS + "|" + AssessmentType.SUMMATIVE + "|" + AssessmentType.INTERIM_COMPREHENSIVE + ")$", }, Constants.ASMTYEAR: { "type": "integer", "required": True, "pattern": "^2[0-9]{3}$" }, Constants.DATETAKEN: { "type": "integer", "required": False, "pattern": "^[0-9]{8}$" }, Constants.MODE: { "type": "string", "required": False, "pattern": "^(gray|GRAY|color|COLOR)$", }, Constants.LANG: { "type": "string", "required": False, "pattern": "^[a-z]{2}$", }, Constants.PDF: { "type": "string", "required": False, "pattern": "^(true|false|TRUE|FALSE)$", }, Constants.SL: { "type": "string", "required": False, "pattern": "^\d+$", } }, FILTERS_CONFIG)
merge_dict( { Constants.STATECODE: { "type": "string", "pattern": "^[a-zA-Z]{2}$", "required": True }, Constants.ASMTYEAR: { "type": "string", "pattern": "^\d{4}$", "required": True }, Constants.ASMTTYPE: { "type": "string", "pattern": "^(" + AssessmentType.SUMMATIVE + "|" + AssessmentType.INTERIM_COMPREHENSIVE + ")$", "required": True }, Constants.ASMTSUBJECT: { "type": "string", "pattern": "^(" + Constants.MATH + "|" + Constants.ELA + ")$", "required": True }, Constants.ASMTGRADE: { "type": "string", "pattern": "^[K0-9]+$", "maxLength": 2, "required": True, }, Constants.ITEMID: { "type": "array", "items": { "type": "string", "pattern": "^[a-zA-Z0-9\-]*$" }, "minItems": 1, "uniqueItems": True, "required": False, } }, FILTERS_CONFIG)
from smarter.utils.encryption import decode import pyramid.threadlocal EDWARE_PUBLIC_SECRET = 'edware.public.secret' REPORT_PARAMS = merge_dict( { Constants.SID: { "type": "string", "required": True, "pattern": "^[a-zA-Z0-9\-\=\_]{20,52}$" }, Constants.DISTRICTGUID: { "type": "string", "required": False, "pattern": "^[a-zA-Z0-9\-]{0,40}$" }, Constants.SCHOOLGUID: { "type": "string", "required": False, "pattern": "^[a-zA-Z0-9\-]{0,40}$" }, Constants.ASMTYEAR: { "type": "integer", "required": False, "pattern": "^[1-9][0-9]{3}$" } }, FILTERS_CONFIG) SHORT_URL_REPORT_PARAMS = { Constants.SID: { "type": "string",
REPORT_PARAMS = merge_dict({ Constants.STATECODE: { "type": "string", "required": True, "pattern": "^[a-zA-Z0-9\-]{0,50}$", }, Constants.DISTRICTGUID: { "type": "string", "required": True, "pattern": "^[a-zA-Z0-9\-]{0,50}$", }, Constants.SCHOOLGUID: { "type": "string", "required": True, "pattern": "^[a-zA-Z0-9\-]{0,50}$", }, Constants.ASMTGRADE: { "type": "string", "maxLength": 2, "required": False, "pattern": "^[K0-9]+$", }, Constants.ASMTSUBJECT: { "type": "array", "required": False, "items": { "type": "string", "pattern": "^(" + Constants.ELA + "|" + Constants.MATH + ")$", } }, Constants.ASMTYEAR: { "type": "integer", "required": False, "pattern": "^[1-9][0-9]{3}$" }, Constants.ASMTTYPE: { "enum": [AssessmentType.INTERIM_ASSESSMENT_BLOCKS, AssessmentType.INTERIM_COMPREHENSIVE, AssessmentType.SUMMATIVE], "required": False } }, FILTERS_CONFIG)
"properties": merge_dict({ Constants.STATECODE: { "type": "string", "pattern": "^[a-zA-Z]{2}$", "required": True }, Constants.ASMTYEAR: { "type": "string", "pattern": "^\d{4}$", "required": True }, Constants.ASMTTYPE: { "type": "string", "pattern": "^(" + AssessmentType.SUMMATIVE + "|" + AssessmentType.INTERIM_COMPREHENSIVE + ")$", "required": True }, Constants.ASMTSUBJECT: { "type": "string", "pattern": "^(" + Constants.MATH + "|" + Constants.ELA + ")$", "required": True }, Constants.ASMTGRADE: { "type": "string", "pattern": "^[K0-9]+$", "maxLength": 2, "required": True, }, Constants.ITEMID: { "type": "array", "items": { "type": "string", "pattern": "^[a-zA-Z0-9\-]*$" }, "minItems": 1, "uniqueItems": True, "required": False, } }, FILTERS_CONFIG)
def process_extraction_request(params, is_async=True): ''' :param dict params: contains query parameter. Value for each pair is expected to be a list :param bool is_tenant_level: True if it is a tenant level request ''' tasks = [] response = {} task_responses = [] filter_params = {} state_code = params[Constants.STATECODE][0] districts = params.get(Constants.DISTRICTGUID, [None]) schools = params.get(Constants.SCHOOLGUID, [None]) grades = params.get(Constants.ASMTGRADE, [None]) request_id, user, tenant = processor.get_extract_request_user_info( state_code) # This is purely for file name conventions (for async extracts), consider refactoring is_tenant_level = is_async # Get filter related parameters if has_filters(params): filter_params = { k: v for k, v in params.items() if k in FILTERS_CONFIG } for district in districts: for school in schools: for grade in grades: for s in params[Constants.ASMTSUBJECT]: for t in params[Constants.ASMTTYPE]: param = merge_dict( { Constants.ASMTSUBJECT: s, Constants.ASMTTYPE: t, Constants.ASMTYEAR: params[Constants.ASMTYEAR][0], Constants.STATECODE: state_code, Constants.SCHOOLGUID: school, Constants.DISTRICTGUID: district, Constants.ASMTGRADE: grade, Constants.STUDENTGUID: params.get(Constants.STUDENTGUID) }, filter_params) task_response = { Constants.STATECODE: param[Constants.STATECODE], Constants.DISTRICTGUID: district, Constants.SCHOOLGUID: school, Extract.EXTRACTTYPE: ExtractType.studentAssessment, Constants.ASMTSUBJECT: param[Constants.ASMTSUBJECT], Constants.ASMTTYPE: param[Constants.ASMTTYPE], Constants.ASMTYEAR: param[Constants.ASMTYEAR], Extract.REQUESTID: request_id } # separate by grades if no grade is specified __tasks, __task_responses = _create_tasks_with_responses( request_id, user, tenant, param, task_response, is_tenant_level=is_tenant_level) tasks += __tasks task_responses += __task_responses if is_async: response['tasks'] = task_responses if tasks: response[Constants.FILES] = [] files = {} archive_file_name = processor.get_archive_file_path( user.get_uid(), tenant, request_id) files[Constants.FILENAME] = os.path.basename(archive_file_name) directory_to_archive = processor.get_extract_work_zone_path( tenant, request_id) # Register extract file with HPZ. registration_id, download_url, web_download_url = register_file( user.get_uid(), user.get_email()) files[Constants.DOWNLOAD_URL] = download_url files[Constants.WEB_DOWNLOAD_URL] = web_download_url response[Constants.FILES].append(files) queue = get_current_registry().settings.get( 'extract.job.queue.async', TaskConstants.DEFAULT_QUEUE_NAME) start_extract(tenant, request_id, [archive_file_name], [directory_to_archive], [registration_id], tasks, queue=queue) return response else: if tasks: settings = get_current_registry().settings queue = settings.get('extract.job.queue.sync', TaskConstants.SYNC_QUEUE_NAME) archive_queue = settings.get('extract.job.queue.archive', TaskConstants.ARCHIVE_QUEUE_NAME) directory_to_archive = processor.get_extract_work_zone_path( tenant, request_id) celery_timeout = int(get_current_registry().settings.get( 'extract.celery_timeout', '30')) # Synchronous calls to generate json and csv and then to archive # BUG, it still routes to 'extract' queue due to chain # result = chain(prepare_path.subtask(args=[tenant, request_id, [directory_to_archive]], queue=queue, immutable=True), # @UndefinedVariable # route_tasks(tenant, request_id, tasks, queue_name=queue), # archive.subtask(args=[request_id, directory_to_archive], queue=archive_queue, immutable=True)).delay() prepare_path.apply_async( args=[request_id, [directory_to_archive]], queue=queue, immutable=True).get( timeout=celery_timeout) # @UndefinedVariable generate_extract_file_tasks( tenant, request_id, tasks, queue_name=queue)().get(timeout=celery_timeout) content = archive_with_stream.apply_async( args=[request_id, directory_to_archive], queue=archive_queue, immutable=True).get(timeout=celery_timeout) clean_up.apply_async( args=[get_extract_request_base_path(tenant, request_id)], queue=queue) # @UndefinedVariable return content else: raise NotFoundException("There are no results")