Esempio n. 1
0
 def test_parse_compressed_file(self):
   filename = path.join(
       path.dirname(__file__), '../testdata/test_file_parser.tsv.gz')
   parser = dsrf_file_parser.DSRFFileParser(self.logger, None, None, filename)
   parser.row_validators_list = self.row_validators_list
   for expected, actual in zip(
       self.expected_blocks, parser.parse_file(1)):
     self.assertMultiLineEqual(str(expected), str(actual))
     self.assertEqual(self.logger._counts['error'], 0)
     self.assertEqual(self.logger._counts['warn'], 0)
     self.assertGreater(self.logger._counts['info'], 0)
Esempio n. 2
0
    def parse_report(self,
                     files_list,
                     dsrf_xsd_file,
                     avs_xsd_file,
                     human_readable=False,
                     write_head=True):
        """Parses a dsrf report to block objects.

    The blocks are transferred to the queue.

    Args:
      files_list: A list of files in the report to parse.
      dsrf_xsd_file: Optional user-provided path to custom XSD.
      avs_xsd_file: Optional user-provided path to custom AVS XSD.
      human_readable: If True, write the block to the queue in a human readable
        form. Otherwise, write the block as a raw bytes.
      write_head: If set to False, the header will not be written to the queue.

    Returns:
      dsrf_logger.DSRFLogger object.
    """
        file_path_to_name_map = {
            file_path: path.basename(file_path)
            for file_path in files_list
        }

        expected_components = constants.FILE_NAME_COMPONENTS
        self.logger.info('Validating the report file names.')
        report_validator = report_files_validators.ReportFilesValidator(
            file_name_validators.FileNameValidator(expected_components),
            self.logger)
        report_validator.validate_file_names(
            list(file_path_to_name_map.values()))
        blocks = defaultdict(set)
        for file_path, file_name in six.iteritems(file_path_to_name_map):
            file_parser = dsrf_file_parser.DSRFFileParser(
                self.logger, dsrf_xsd_file, avs_xsd_file, file_path)
            file_name_dict = file_name_validators.FileNameValidator.split_file_name(
                file_name, expected_components)
            file_number = file_name_dict['x']
            self.logger.info('Start parsing file number %s.', file_number)
            for block in file_parser.parse_file(int(file_number)):
                if block.type == block_pb2.BODY:
                    for compared_file_number, file_blocks in six.iteritems(
                            blocks):
                        if block.number in file_blocks:
                            raise error.ReportValidationFailure(
                                'The block number %s is not unique. It appears in files '
                                'number: %s and %s.' %
                                (block.number,
                                 min(file_number, compared_file_number),
                                 max(file_number, compared_file_number)))
                    blocks[file_number].add(block.number)
                elif block.type == block_pb2.HEAD:
                    try:
                        self.validate_head_block(block, file_name,
                                                 file_name_dict)
                    except error.FileNameValidationFailure as e:
                        self.logger.error(e)
                    if not write_head:
                        # Skip writing the header to the queue, if requested.
                        continue
                else:
                    # FOOT
                    continue
                self.write_to_queue(block, self.logger, human_readable)
        try:
            self.logger.raise_if_fatal_errors_found()
        except error.ReportValidationFailure as e:
            sys.stderr.write(constants.COLOR_RED + constants.BOLD +
                             '\n[Cell validation] ' + str(e) + constants.ENDC)
        return self.logger
Esempio n. 3
0
 def _get_file_parser(self, row_validators=None):
   parser = dsrf_file_parser.DSRFFileParser(
       self.logger, None, None, 'filename')
   parser.row_validators_list = row_validators or self.row_validators_list
   return parser