def testMultipleRequests(self):
    """Tests restoring the reader state across multiple requests."""
    input_file = files.blobstore.create()

    # Create a file with two records.
    with files.open(input_file, "a") as f:
      with records.RecordsWriter(f) as w:
        proto = file_service_pb.KeyValues()
        proto.set_key("key2")
        proto.value_list().extend(["a", "b"])
        proto.set_partial(True)
        w.write(proto.Encode())

        proto = file_service_pb.KeyValues()
        proto.set_key("key2")
        proto.value_list().extend(["c", "d"])
        w.write(proto.Encode())

    files.finalize(input_file)
    input_file = files.blobstore.get_file_name(
        files.blobstore.get_blob_key(input_file))

    # Now read the records in two attempts, serializing and recreating the
    # input reader as if it's a separate request.
    reader = mapreduce_pipeline._ReducerReader([input_file], 0)
    it = iter(reader)
    self.assertEquals(input_readers.ALLOW_CHECKPOINT, it.next())

    reader_state = reader.to_json()
    other_reader = mapreduce_pipeline._ReducerReader.from_json(reader_state)
    it = iter(reader)
    self.assertEquals(("key2", ["a", "b", "c", "d"]), it.next())
  def testSingleRequest(self):
    """Tests when a key can be handled during a single request."""
    input_file = files.blobstore.create()

    with files.open(input_file, "a") as f:
      with records.RecordsWriter(f) as w:
        # First record is full
        proto = file_service_pb.KeyValues()
        proto.set_key("key1")
        proto.value_list().extend(["a", "b"])
        w.write(proto.Encode())
        # Second record is partial
        proto = file_service_pb.KeyValues()
        proto.set_key("key2")
        proto.value_list().extend(["a", "b"])
        proto.set_partial(True)
        w.write(proto.Encode())
        proto = file_service_pb.KeyValues()
        proto.set_key("key2")
        proto.value_list().extend(["c", "d"])
        w.write(proto.Encode())

    files.finalize(input_file)
    input_file = files.blobstore.get_file_name(
        files.blobstore.get_blob_key(input_file))

    reader = mapreduce_pipeline._ReducerReader([input_file], 0)
    self.assertEquals(
        [("key1", ["a", "b"]),
         input_readers.ALLOW_CHECKPOINT,
         ("key2", ["a", "b", "c", "d"])],
        list(reader))

    # now test state serialization
    reader = mapreduce_pipeline._ReducerReader([input_file], 0)
    i = reader.__iter__()
    self.assertEquals(
        {"position": 0,
         "current_values": "Ti4=",
         "current_key": "Ti4=",
         "filenames": [input_file]},
        reader.to_json())

    self.assertEquals(("key1", ["a", "b"]), i.next())
    self.assertEquals(
        {"position": 19,
         "current_values": "Ti4=",
         "current_key": "Ti4=",
         "filenames": [input_file]},
        reader.to_json())

    self.assertEquals(input_readers.ALLOW_CHECKPOINT, i.next())
    self.assertEquals(
        {"position": 40,
         "current_values": "KGxwMApTJ2EnCnAxCmFTJ2InCnAyCmEu",
         "current_key": "UydrZXkyJwpwMAou",
         "filenames": [input_file]},
        reader.to_json())

    self.assertEquals(("key2", ["a", "b", "c", "d"]), i.next())
    self.assertEquals(
        {"position": 59,
         "current_values": "Ti4=",
         "current_key": "Ti4=",
         "filenames": [input_file]},
        reader.to_json())

    try:
      i.next()
      self.fail("Exception expected")
    except StopIteration:
      # expected
      pass

    # now do test deserialization at every moment.
    reader = mapreduce_pipeline._ReducerReader([input_file], 0)
    i = reader.__iter__()
    reader = mapreduce_pipeline._ReducerReader.from_json(reader.to_json())

    self.assertEquals(("key1", ["a", "b"]), i.next())
    reader = mapreduce_pipeline._ReducerReader.from_json(reader.to_json())

    self.assertEquals(input_readers.ALLOW_CHECKPOINT, i.next())
    reader = mapreduce_pipeline._ReducerReader.from_json(reader.to_json())

    self.assertEquals(("key2", ["a", "b", "c", "d"]), i.next())
    reader = mapreduce_pipeline._ReducerReader.from_json(reader.to_json())

    try:
      i.next()
      self.fail("Exception expected")
    except StopIteration:
      # expected
      pass
Example #3
0
  def testReadPartial(self):
    input_file = files.blobstore.create()

    with files.open(input_file, "a") as f:
      with records.RecordsWriter(f) as w:
        # First record is full
        proto = file_service_pb.KeyValues()
        proto.set_key("key1")
        proto.value_list().extend(["a", "b"])
        w.write(proto.Encode())
        # Second record is partial
        proto = file_service_pb.KeyValues()
        proto.set_key("key2")
        proto.value_list().extend(["a", "b"])
        proto.set_partial(True)
        w.write(proto.Encode())
        proto = file_service_pb.KeyValues()
        proto.set_key("key2")
        proto.value_list().extend(["c", "d"])
        w.write(proto.Encode())

    files.finalize(input_file)
    input_file = files.blobstore.get_file_name(
        files.blobstore.get_blob_key(input_file))

    reader = mapreduce_pipeline._ReducerReader([input_file], 0)
    self.assertEquals(
        [("key1", ["a", "b"]),
         input_readers.ALLOW_CHECKPOINT,
         ("key2", ["a", "b", "c", "d"])],
        list(reader))

    # now test state serialization
    reader = mapreduce_pipeline._ReducerReader([input_file], 0)
    i = reader.__iter__()
    self.assertEquals(
        {"position": 0,
         "current_values": None,
         "current_key": None,
         "filenames": [input_file]},
        reader.to_json())

    self.assertEquals(("key1", ["a", "b"]), i.next())
    self.assertEquals(
        {"position": 19,
         "current_values": None,
         "current_key": None,
         "filenames": [input_file]},
        reader.to_json())

    self.assertEquals(input_readers.ALLOW_CHECKPOINT, i.next())
    self.assertEquals(
        {"position": 40,
         "current_values": ["a", "b"],
         "current_key": "key2",
         "filenames": [input_file]},
        reader.to_json())

    self.assertEquals(("key2", ["a", "b", "c", "d"]), i.next())
    self.assertEquals(
        {"position": 59,
         "current_values": None,
         "current_key": None,
         "filenames": [input_file]},
        reader.to_json())

    try:
      i.next()
      self.fail("Exception expected")
    except StopIteration:
      # expected
      pass

    # now do test deserialization at every moment.
    reader = mapreduce_pipeline._ReducerReader([input_file], 0)
    i = reader.__iter__()
    reader = mapreduce_pipeline._ReducerReader.from_json(reader.to_json())

    self.assertEquals(("key1", ["a", "b"]), i.next())
    reader = mapreduce_pipeline._ReducerReader.from_json(reader.to_json())

    self.assertEquals(input_readers.ALLOW_CHECKPOINT, i.next())
    reader = mapreduce_pipeline._ReducerReader.from_json(reader.to_json())

    self.assertEquals(("key2", ["a", "b", "c", "d"]), i.next())
    reader = mapreduce_pipeline._ReducerReader.from_json(reader.to_json())

    try:
      i.next()
      self.fail("Exception expected")
    except StopIteration:
      # expected
      pass