Beispiel #1
0
  def parse_file(self, file_number):
    """Parses the file to a protocol buffer block objects.

    Args:

      file_number: The file number in the report (eg. "3of4" -> 3).

    Yields:
      Each yield is a single block object (block_pb2.Block).
    """
    row_number = 0
    block_number = 0
    if self.is_compressed():
      tsv = gzip.open(self.file_path, 'rU')
    else:
      tsv = open(self.file_path, 'rU')
    current_block = block_pb2.Block(file_number=file_number)
    self.logger.info(
        'Start parsing the HEAD block in file number %s.', file_number)
    for line in csv.reader(tsv, dialect=TsvDialect):
      row_number += 1
      # Comment row.
      if line[0].startswith(constants.COMMENT_SIGN):
        continue
      try:
        row_type = self._get_row_type(line, row_number)
        # End of block check.
        if self.is_end_of_block(line, row_type, row_number, current_block):
          yield current_block
          current_block = block_pb2.Block(file_number=file_number)
        # HEAD/FOOT row.
        if (constants.HEADER_ROW_PATTERN.match(row_type) or
            row_type in constants.FOOT_ROWS):
          current_block.type = block_pb2.HEAD
          if row_type in constants.FOOT_ROWS:
            self.logger.info(
                'Start parsing the FOOT block in file number %s.', file_number)
            current_block.type = block_pb2.FOOT
          if row_type == 'HEAD':
            self.row_validators_list = self.get_row_validators(line)
            current_block.version = line[1]
          current_block.rows.extend([
              self.get_row_object(line, row_type, row_number, block_number)])
          continue
        # Body row.
        block_number = self.get_block_number(line, row_number)
        row = self.get_row_object(line, row_type, row_number, block_number)
        if not current_block.type:
          current_block.type = block_pb2.BODY
          current_block.number = block_number
        current_block.rows.extend([row])
      except error.ValidationError as e:
        self.logger.error(e)

    yield current_block
Beispiel #2
0
  def read_blocks_from_queue(self):
    """Returns a generator of the blocks in the queue.

    Override this method if you wish to change the queue (blocks transformation)
    form.

    Yields:
      Each yield is a single block object (block_pb2.Block).
    """
    message_lines = []
    for line in sys.stdin:
      if constants.QUEUE_DELIMITER in line:
        block = block_pb2.Block()
        try:
          block.ParseFromString(b'\n'.join(message_lines))
        except message_mod.DecodeError:
          sys.stderr.write(
              'ERROR: Can not read protocol buffer from queue. Is '
              'human_readable perhaps set to true? I am not a human. '
              'Aborting...\n')
          sys.exit(-1)

        yield block
        message_lines = []
      else:
        message_lines.append(line.rstrip(bytes('\n', encoding='utf8')))
def _create_test_block(row_types):
    block_proto = block_pb2.Block(type=block_pb2.BODY, number=0, file_number=1)
    row_number = 0
    for row_type in row_types:
        row_number += 1
        row = block_proto.rows.add()
        row.type = row_type
        row.row_number = row_number
    return block_proto
 def block_from_ascii(cls, text):
     """Returns Block protobuf parsed from ASCII text."""
     block = block_pb2.Block()
     text_format.Merge(text, block)
     return block
Beispiel #5
0
 def test_is_end_of_block_true(self):
   line = ['SU02', 'BL8', '11', 'SR1', 'AdSupport', 'NonInterStream']
   new_block = block_pb2.Block()
   parser = self._get_file_parser()
   self.assertTrue(
       parser.is_end_of_block(line, 'SU02', 5, new_block))
Beispiel #6
0
 def test_is_end_of_block_false(self):
   first_line = ['HEAD', '123']
   new_block = block_pb2.Block()
   parser = self._get_file_parser()
   self.assertFalse(
       parser.is_end_of_block(first_line, 'HEAD', 5, new_block))