def test_invalid_territory_warns(self): validator = file_name_validators.FileNameValidator( constants.FILE_NAME_COMPONENTS) unused_file_name_dict, warnings = validator.validate_value( self._get_filename(territory_of_use_or_sale='potato')) self.assertEquals(len(warnings), 1) expected_warning = ( 'It is recommended that the TerritoryOfUseOrSale be set to a CISAC TIS ' 'code or a two-letter ISO code (use "multi" or "worldwide" for ' 'multiple territories). Provided value: "potato"') self.assertIn(expected_warning, str(list(warnings)[0]))
def test_mismatch_file_without_fail_fast(self): self.validator = report_files_validators.ReportFilesValidator( file_name_validators.FileNameValidator(constants.FILE_NAME_COMPONENTS), dsrf_logger.DSRFLogger(__name__, '/tmp/example.log', False)) files_list = [ 'DSR_PADPIDA2014999999Z_PADPIDA2014111801Y_AdSupport_2015-02_AU_1of4_' '20150723T092522.tsv.gz', 'DSR_PADPIDA2_PADPIDA2014111801Y_AdSupport_2015-02_AU_2of4_' '20150723T092522.tsv.gz', 'DSR_PADPIDA2014999999Z_PADPIDA2014111801Y_AdSupport_2015-02_AU_3of4_' '20150723T092522.tsv.gz', 'DSR_PADPIDA2014999999Z_PADPIDA2014111801Y_AdSupport_2015-02_AU_4of4_' '20150723T092522.tsv.gz'] self.assertRaisesRegexp( error.ReportValidationFailure, 'Found 1 fatal error\\(s\\) and 0 warnings, please check log file at ' '"/tmp/example.log" for details.\nFirst error: File ', # truncated self.validator.validate_file_names, files_list)
def test_file_name_validator(self): validator = file_name_validators.FileNameValidator( constants.FILE_NAME_COMPONENTS) expected_file_name_dict = { 'DSR': 'DSR', 'MessageRecipient': 'PADPIDA2014999999Z', 'MessageSender': 'PADPIDA2014111801Y', 'ServiceDescription': 'AdSupport', 'MessageNotificationPeriod': '2015-02', 'TerritoryOfUseOrSale': 'AU', 'x': '3', 'y': '4', 'MessageCreatedDateTime': '20150723T092522', 'ext': 'tsv' } actual_file_name_dict, warnings = validator.validate_value( self._get_filename()) self.assertEquals(set(), warnings) self.assertEquals(actual_file_name_dict, expected_file_name_dict)
def setUp(self): self.validator = report_files_validators.ReportFilesValidator( file_name_validators.FileNameValidator( constants.FILE_NAME_COMPONENTS), dsrf_logger.DSRFLogger(__name__, '/tmp/example.log', True))
def test_multi_territory(self): validator = file_name_validators.FileNameValidator( constants.FILE_NAME_COMPONENTS) validator.validate_value( 'DSR_PADPIDA2014999999Z_PADPIDA2014111801Y_' 'AdSupport_2015-02_multi_3of4_20150723T092522.tsv')
def test_invalid_format(self): validator = file_name_validators.FileNameValidator( constants.FILE_NAME_COMPONENTS) self.assertRaisesRegexp(error.FileNameValidationFailure, 'File 1.csv has invalid filename', validator.validate_value, '1.csv')
def parse_report(self, files_list, dsrf_xsd_file, avs_xsd_file, human_readable=False, write_head=True): """Parses a dsrf report to block objects. The blocks are transferred to the queue. Args: files_list: A list of files in the report to parse. dsrf_xsd_file: Optional user-provided path to custom XSD. avs_xsd_file: Optional user-provided path to custom AVS XSD. human_readable: If True, write the block to the queue in a human readable form. Otherwise, write the block as a raw bytes. write_head: If set to False, the header will not be written to the queue. Returns: dsrf_logger.DSRFLogger object. """ file_path_to_name_map = { file_path: path.basename(file_path) for file_path in files_list } expected_components = constants.FILE_NAME_COMPONENTS self.logger.info('Validating the report file names.') report_validator = report_files_validators.ReportFilesValidator( file_name_validators.FileNameValidator(expected_components), self.logger) report_validator.validate_file_names( list(file_path_to_name_map.values())) blocks = defaultdict(set) for file_path, file_name in six.iteritems(file_path_to_name_map): file_parser = dsrf_file_parser.DSRFFileParser( self.logger, dsrf_xsd_file, avs_xsd_file, file_path) file_name_dict = file_name_validators.FileNameValidator.split_file_name( file_name, expected_components) file_number = file_name_dict['x'] self.logger.info('Start parsing file number %s.', file_number) for block in file_parser.parse_file(int(file_number)): if block.type == block_pb2.BODY: for compared_file_number, file_blocks in six.iteritems( blocks): if block.number in file_blocks: raise error.ReportValidationFailure( 'The block number %s is not unique. It appears in files ' 'number: %s and %s.' % (block.number, min(file_number, compared_file_number), max(file_number, compared_file_number))) blocks[file_number].add(block.number) elif block.type == block_pb2.HEAD: try: self.validate_head_block(block, file_name, file_name_dict) except error.FileNameValidationFailure as e: self.logger.error(e) if not write_head: # Skip writing the header to the queue, if requested. continue else: # FOOT continue self.write_to_queue(block, self.logger, human_readable) try: self.logger.raise_if_fatal_errors_found() except error.ReportValidationFailure as e: sys.stderr.write(constants.COLOR_RED + constants.BOLD + '\n[Cell validation] ' + str(e) + constants.ENDC) return self.logger