def test_create_csv(self): root_dir_xml = os.path.join(self.__tempfolder.name, str(uuid.uuid4()), str(uuid.uuid4())) root_dir_csv = os.path.join(self.__tempfolder.name, str(uuid.uuid4()), str(uuid.uuid4())) work_dir = os.path.join(self.__tempfolder.name, 'work_dir') xml_string = '''<TDSReport> <Test testId="SBAC-FT-SomeDescription-ELA-7" subject="MATH" grade="3" assessmentType="Summative" academicYear="2014" /> <Examinee key="12"> <ExamineeRelationship context="INITIAL" name="StateAbbreviation" entityKey="3" value="CA" contextDate="2014-04-14T11:13:41.803"/> </Examinee> <Opportunity> <Item position="position_value" segmentId="segmentId_value" bankKey="test" key="key_value" operational="operational_value" isSelected="isSelected_value" format="format_type_value" score="score_value" scoreStatus="scoreStatus_value" adminDate="adminDate_value" numberVisits="numberVisits_value" mimeType="test" strand="strand_value" contentLevel="contentLevel_value" pageNumber="pageNumber_value" pageVisits="pageVisits_value" pageTime="pageTime_value" dropped="dropped_value"> </Item> </Opportunity> </TDSReport>''' meta_names = meta.Meta(True, '12', 'test2', 'test3', 'test4', 'test5', 'test6', 'test7', 'test8', 'test9') xml_file_path = create_path(root_dir_xml, meta_names, generate_path_to_raw_xml) file_writer(xml_file_path, xml_string) rows = [] csv_file_path = create_path(root_dir_csv, meta_names, generate_path_to_item_csv) remote_csv_generator(meta_names, csv_file_path, xml_file_path, work_dir, metadata_queue='test') with open(csv_file_path, newline='') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') for row in csv_reader: rows.append(row) csv_first_row_list = ['key_value', '12', 'segmentId_value', 'position_value', '', 'operational_value', 'isSelected_value', 'format_type_value', 'score_value', 'scoreStatus_value', 'adminDate_value', 'numberVisits_value', 'strand_value', 'contentLevel_value', 'pageNumber_value', 'pageVisits_value', 'pageTime_value', 'dropped_value'] self.assertEqual(1, len(rows)) self.assertEqual(csv_first_row_list, rows[0])
def remote_write(xml_data): ''' save data in given path :returns: True when file is written ''' written = False try: meta_names = extract_meta_names(xml_data) root_dir_csv = conf.get("smarter_score_batcher.base_dir.csv") if root_dir_csv is not None: root_dir_csv = os.path.abspath(root_dir_csv) root_dir_xml = conf.get("smarter_score_batcher.base_dir.xml") if root_dir_xml is not None: root_dir_xml = os.path.abspath(root_dir_xml) timestamp = time.strftime('%Y%m%d%H%M%S', time.gmtime()) xml_file_path = create_path(root_dir_xml, meta_names, generate_file_path, **{'extension': timestamp + '.xml'}) if os.path.commonprefix([root_dir_xml, xml_file_path]) != root_dir_xml: raise TSBSecurityException(msg='Fail to create filepath name requested dir[' + xml_file_path + ']', err_code=ErrorCode.PATH_TRAVERSAL_DETECTED, err_source=ErrorSource.REMOTE_WRITE) written = file_writer(xml_file_path, xml_data) if written: work_dir = conf.get("smarter_score_batcher.base_dir.working") if work_dir is not None: work_dir = os.path.abspath(work_dir) queue_name = conf.get('smarter_score_batcher.async_queue') csv_file_path = create_path(root_dir_csv, meta_names, generate_path_to_item_csv) if os.path.commonprefix([root_dir_csv, csv_file_path]) != root_dir_csv: raise TSBSecurityException(msg='Fail to create filepath name requested dir[' + csv_file_path + ']', err_code=ErrorCode.PATH_TRAVERSAL_DETECTED, err_source=ErrorSource.REMOTE_WRITE) metadata_queue = conf.get('smarter_score_batcher.metadata_queue') # Fire two celery tasks - one to generate metadata for xml, and one to generate item level/assessment csv metadata_generator_task.apply_async(args=(xml_file_path,), queue=metadata_queue) # @UndefinedVariable remote_csv_generator.apply_async(args=(meta_names, csv_file_path, xml_file_path, work_dir, metadata_queue), queue=queue_name) # @UndefinedVariable except TSBException as e: # ignore exception for error handling because this function is synchonous call logging.error(str(e)) return written
def test_generate_csv_from_xml(self): root_dir_xml = os.path.join(self.__tempfolder.name, str(uuid.uuid4()), str(uuid.uuid4())) root_dir_csv = os.path.join(self.__tempfolder.name, str(uuid.uuid4()), str(uuid.uuid4())) work_dir = os.path.join(self.__tempfolder.name, "work") xml_string = '''<TDSReport> <Test subject="MATH" testId="SBAC-FT-SomeDescription-ELA-7" grade="3" assessmentType="Summative" academicYear="2014" /> <Examinee key="12"> <ExamineeRelationship context="INITIAL" name="StateAbbreviation" entityKey="3" value="CA" contextDate="2014-04-14T11:13:41.803"/> </Examinee> <Opportunity> <Item position="position_value" segmentId="segmentId_value" bankKey="test" key="key_value" operational="operational_value" isSelected="isSelected_value" format="format_type_value" score="score_value" scoreStatus="scoreStatus_value" adminDate="adminDate_value" numberVisits="numberVisits_value" mimeType="test" strand="strand_value" contentLevel="contentLevel_value" pageNumber="pageNumber_value" pageVisits="pageVisits_value" pageTime="pageTime_value" dropped="dropped_value"> </Item> </Opportunity> </TDSReport>''' meta_names = Meta(True, 'test1', 'test2', 'test3', 'test4', 'test5', 'test6', 'test7', 'test8', 'test9') xml_file_path = create_path(root_dir_xml, meta_names, generate_path_to_raw_xml) file_writer(xml_file_path, xml_string) rows = [] csv_file_path = create_path(root_dir_csv, meta_names, generate_path_to_item_csv) generate_csv_from_xml(meta_names, csv_file_path, xml_file_path, work_dir, metadata_queue='test') with open(csv_file_path, newline='') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') for row in csv_reader: rows.append(row) csv_first_row_list = ['key_value', 'test1', 'segmentId_value', 'position_value', '', 'operational_value', 'isSelected_value', 'format_type_value', 'score_value', 'scoreStatus_value', 'adminDate_value', 'numberVisits_value', 'strand_value', 'contentLevel_value', 'pageNumber_value', 'pageVisits_value', 'pageTime_value', 'dropped_value'] self.assertEqual(1, len(rows)) self.assertEqual(csv_first_row_list, rows[0])
def test_generate_csv_from_xml_parse_error(self): root_dir_xml = os.path.join(self.__tempfolder.name, str(uuid.uuid4()), str(uuid.uuid4())) root_dir_csv = os.path.join(self.__tempfolder.name, str(uuid.uuid4()), str(uuid.uuid4())) work_dir = os.path.join(self.__tempfolder.name, "work") xml_string = "bad xml" meta_names = Meta(True, 'test1', 'test2', 'test3', 'test4', 'test5', 'test6', 'test7', 'test8', 'test9') xml_file_path = create_path(root_dir_xml, meta_names, generate_path_to_raw_xml) file_writer(xml_file_path, xml_string) csv_file_path = create_path(root_dir_csv, meta_names, generate_path_to_item_csv) self.assertRaises(TSBException, generate_csv_from_xml, meta_names, csv_file_path, xml_file_path, work_dir, metadata_queue='test')
def remote_write(xml_data): ''' save data in given path :returns: True when file is written ''' written = False try: meta_names = extract_meta_names(xml_data) root_dir_csv = conf.get("smarter_score_batcher.base_dir.csv") if root_dir_csv is not None: root_dir_csv = os.path.abspath(root_dir_csv) root_dir_xml = conf.get("smarter_score_batcher.base_dir.xml") if root_dir_xml is not None: root_dir_xml = os.path.abspath(root_dir_xml) timestamp = time.strftime('%Y%m%d%H%M%S', time.gmtime()) xml_file_path = create_path(root_dir_xml, meta_names, generate_file_path, **{'extension': timestamp + '.xml'}) if os.path.commonprefix([root_dir_xml, xml_file_path]) != root_dir_xml: raise TSBSecurityException( msg='Fail to create filepath name requested dir[' + xml_file_path + ']', err_code=ErrorCode.PATH_TRAVERSAL_DETECTED, err_source=ErrorSource.REMOTE_WRITE) written = file_writer(xml_file_path, xml_data) if written: work_dir = conf.get("smarter_score_batcher.base_dir.working") if work_dir is not None: work_dir = os.path.abspath(work_dir) queue_name = conf.get('smarter_score_batcher.async_queue') csv_file_path = create_path(root_dir_csv, meta_names, generate_path_to_item_csv) if os.path.commonprefix([root_dir_csv, csv_file_path ]) != root_dir_csv: raise TSBSecurityException( msg='Fail to create filepath name requested dir[' + csv_file_path + ']', err_code=ErrorCode.PATH_TRAVERSAL_DETECTED, err_source=ErrorSource.REMOTE_WRITE) metadata_queue = conf.get('smarter_score_batcher.metadata_queue') # Fire two celery tasks - one to generate metadata for xml, and one to generate item level/assessment csv metadata_generator_task.apply_async( args=(xml_file_path, ), queue=metadata_queue) # @UndefinedVariable remote_csv_generator.apply_async( args=(meta_names, csv_file_path, xml_file_path, work_dir, metadata_queue), queue=queue_name) # @UndefinedVariable except TSBException as e: # ignore exception for error handling because this function is synchonous call logging.error(str(e)) return written
def test_create_path_invalid(self): meta = Meta(True, 'NA', 'state_name', 'district_id', 'academic_year', 'asmt_type', 'subject', 'grade', 'effective_date', 'asmt_id') path = os.path.join(self.__temp_dir.name, 'student_id', 'STATE_NAME', 'district_id', 'academic_year', 'asmt_type', 'subject', 'grade', 'effective_date') create_path_result = create_path(self.__temp_dir.name, meta, generate_path_to_raw_xml) self.assertNotEqual(path, create_path_result)
def test_generate_csv_from_xml_parse_exception_written(self, mock_process_item_level_data): mock_process_item_level_data.return_value = True root_dir_xml = os.path.join(self.__tempfolder.name, str(uuid.uuid4()), str(uuid.uuid4())) root_dir_csv = os.path.join(self.__tempfolder.name, str(uuid.uuid4()), str(uuid.uuid4())) work_dir = os.path.join(self.__tempfolder.name, "work") xml_string = '''<TDSReport> <Test subject="MATH" grade="3" assessmentType="Summative" academicYear="2014" /> <Examinee key="12"> <Opportunity> <Item position="position_value" segmentId="segmentId_value" bankKey="test" key="key_value" operational="operational_value" isSelected="isSelected_value" format="format_type_value" score="score_value" scoreStatus="scoreStatus_value" adminDate="adminDate_value" numberVisits="numberVisits_value" mimeType="test" strand="strand_value" contentLevel="contentLevel_value" pageNumber="pageNumber_value" pageVisits="pageVisits_value" pageTime="pageTime_value" dropped="dropped_value"> </Item> </Opportunity> </TDSReport>''' meta_names = Meta(True, 'test1', 'test2', 'test3', 'test4', 'test5', 'test6', 'test7', 'test8', 'test9') xml_file_path = create_path(root_dir_xml, meta_names, generate_path_to_raw_xml) file_writer(xml_file_path, xml_string) csv_file_path = create_path(root_dir_csv, meta_names, generate_path_to_item_csv) self.assertRaises(TSBException, generate_csv_from_xml, meta_names, csv_file_path, xml_file_path, work_dir, metadata_queue='test')
def test_create_path_valid(self): meta = Meta(True, 'student_id', 'state_name', 'district_id', 'academic_year', 'asmt_type', 'subject', 'grade', 'effective_date', 'asmt_id') path = os.path.join(self.__temp_dir.name, 'STATE_NAME', 'academic_year', 'ASMT_TYPE', 'effective_', 'SUBJECT', 'grade', 'district_id', 'student_id.xml') create_path_result = create_path(self.__temp_dir.name, meta, generate_path_to_raw_xml) self.assertEqual(path, create_path_result)