def write(self, data, ctx): """Write data. Args: data: actual data yielded from handler. Type is writer-specific. ctx: an instance of context.Context. """ if len(data) != 2: logging.error("Got bad tuple of length %d (2-tuple expected): %s", len(data), data) try: key = str(data[0]) value = str(data[1]) except TypeError: logging.error("Expecting a tuple, but got %s: %s", data.__class__.__name__, data) file_index = key.__hash__() % len(self._filenames) pool_name = "kv_pool%d" % file_index filename = self._filenames[file_index] if ctx.get_pool(pool_name) is None: ctx.register_pool( pool_name, output_writers.RecordsPool(filename=filename, ctx=ctx)) proto = file_service_pb.KeyValue() proto.set_key(key) proto.set_value(value) ctx.get_pool(pool_name).append(proto.Encode())
def _sort_records(records): """Map function sorting records. Converts records to KeyValue protos, sorts them by key and writes them into new blobstore file. Creates _OutputFile entity to record resulting file name. Args: records: list of records which are serialized KeyValue protos. """ ctx = context.get() l = len(records) proto_records = [None] * l logging.debug("Parsing") for i in range(l): proto = file_service_pb.KeyValue() proto.ParseFromString(records[i]) proto_records[i] = proto logging.debug("Sorting") proto_records.sort(cmp=_compare_keys) logging.debug("Writing") blob_file_name = (ctx.mapreduce_spec.name + "-" + ctx.mapreduce_id + "-output") output_path = files.blobstore.create( _blobinfo_uploaded_filename=blob_file_name) with output_writers.RecordsPool(output_path, ctx=ctx) as pool: for proto in proto_records: pool.append(proto.Encode()) logging.debug("Finalizing") files.finalize(output_path) time.sleep(1) output_path = files.blobstore.get_file_name( files.blobstore.get_blob_key(output_path)) entity = _OutputFile(key_name=output_path, parent=_OutputFile.get_root_key(ctx.mapreduce_id)) entity.put()