def create_operation(operation_name, spec, counter_factory, step_name, state_sampler, test_shuffle_source=None, test_shuffle_sink=None, is_streaming=False): """Create Operation object for given operation specification.""" if isinstance(spec, operation_specs.WorkerRead): if isinstance(spec.source, iobase.SourceBundle): op = ReadOperation(operation_name, spec, counter_factory, state_sampler) else: from dataflow_worker.native_operations import NativeReadOperation op = NativeReadOperation(operation_name, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerWrite): from dataflow_worker.native_operations import NativeWriteOperation op = NativeWriteOperation(operation_name, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerCombineFn): op = CombineOperation(operation_name, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerPartialGroupByKey): op = create_pgbk_op(operation_name, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerDoFn): op = DoOperation(operation_name, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerGroupingShuffleRead): from dataflow_worker.shuffle_operations import GroupedShuffleReadOperation op = GroupedShuffleReadOperation(operation_name, spec, counter_factory, state_sampler, shuffle_source=test_shuffle_source) elif isinstance(spec, operation_specs.WorkerUngroupedShuffleRead): from dataflow_worker.shuffle_operations import UngroupedShuffleReadOperation op = UngroupedShuffleReadOperation(operation_name, spec, counter_factory, state_sampler, shuffle_source=test_shuffle_source) elif isinstance(spec, operation_specs.WorkerInMemoryWrite): op = InMemoryWriteOperation(operation_name, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerShuffleWrite): from dataflow_worker.shuffle_operations import ShuffleWriteOperation op = ShuffleWriteOperation(operation_name, spec, counter_factory, state_sampler, shuffle_sink=test_shuffle_sink) elif isinstance(spec, operation_specs.WorkerFlatten): op = FlattenOperation(operation_name, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerMergeWindows): from dataflow_worker.shuffle_operations import BatchGroupAlsoByWindowsOperation from dataflow_worker.shuffle_operations import StreamingGroupAlsoByWindowsOperation if is_streaming: op = StreamingGroupAlsoByWindowsOperation(operation_name, spec, counter_factory, state_sampler) else: op = BatchGroupAlsoByWindowsOperation(operation_name, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerReifyTimestampAndWindows): from dataflow_worker.shuffle_operations import ReifyTimestampAndWindowsOperation op = ReifyTimestampAndWindowsOperation(operation_name, spec, counter_factory, state_sampler) else: raise TypeError( 'Expected an instance of operation_specs.Worker* class ' 'instead of %s' % (spec, )) op.step_name = step_name op.metrics_container = MetricsContainer(step_name) op.scoped_metrics_container = ScopedMetricsContainer(op.metrics_container) return op
def create_operation(name_context, spec, counter_factory, step_name, state_sampler, test_shuffle_source=None, test_shuffle_sink=None, is_streaming=False): """Create Operation object for given operation specification.""" if not isinstance(name_context, common.NameContext): # TODO(BEAM-4028): Remove ad-hoc NameContext once all has been migrated. name_context = common.DataflowNameContext(step_name=name_context, user_name=step_name, system_name=None) if isinstance(spec, operation_specs.WorkerRead): if isinstance(spec.source, iobase.SourceBundle): op = ReadOperation(name_context, spec, counter_factory, state_sampler) else: from dataflow_worker.native_operations import NativeReadOperation op = NativeReadOperation(name_context, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerWrite): from dataflow_worker.native_operations import NativeWriteOperation op = NativeWriteOperation(name_context, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerCombineFn): op = CombineOperation(name_context, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerPartialGroupByKey): op = create_pgbk_op(name_context, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerDoFn): op = DoOperation(name_context, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerGroupingShuffleRead): from dataflow_worker.shuffle_operations import GroupedShuffleReadOperation op = GroupedShuffleReadOperation(name_context, spec, counter_factory, state_sampler, shuffle_source=test_shuffle_source) elif isinstance(spec, operation_specs.WorkerUngroupedShuffleRead): from dataflow_worker.shuffle_operations import UngroupedShuffleReadOperation op = UngroupedShuffleReadOperation(name_context, spec, counter_factory, state_sampler, shuffle_source=test_shuffle_source) elif isinstance(spec, operation_specs.WorkerInMemoryWrite): op = InMemoryWriteOperation(name_context, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerShuffleWrite): from dataflow_worker.shuffle_operations import ShuffleWriteOperation op = ShuffleWriteOperation(name_context, spec, counter_factory, state_sampler, shuffle_sink=test_shuffle_sink) elif isinstance(spec, operation_specs.WorkerFlatten): op = FlattenOperation(name_context, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerMergeWindows): from dataflow_worker.shuffle_operations import BatchGroupAlsoByWindowsOperation from dataflow_worker.shuffle_operations import StreamingGroupAlsoByWindowsOperation if is_streaming: op = StreamingGroupAlsoByWindowsOperation(name_context, spec, counter_factory, state_sampler) else: op = BatchGroupAlsoByWindowsOperation(name_context, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerReifyTimestampAndWindows): from dataflow_worker.shuffle_operations import ReifyTimestampAndWindowsOperation op = ReifyTimestampAndWindowsOperation(name_context, spec, counter_factory, state_sampler) else: raise TypeError( 'Expected an instance of operation_specs.Worker* class ' 'instead of %s' % (spec, )) return op
def create_operation(name_context, spec, counter_factory, step_name=None, state_sampler=None, test_shuffle_source=None, test_shuffle_sink=None, is_streaming=False): # type: (...) -> Operation """Create Operation object for given operation specification.""" # TODO(pabloem): Document arguments to this function call. if not isinstance(name_context, common.NameContext): name_context = common.NameContext(step_name=name_context) if isinstance(spec, operation_specs.WorkerRead): if isinstance(spec.source, iobase.SourceBundle): op = ReadOperation(name_context, spec, counter_factory, state_sampler) # type: Operation else: from dataflow_worker.native_operations import NativeReadOperation op = NativeReadOperation(name_context, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerWrite): from dataflow_worker.native_operations import NativeWriteOperation op = NativeWriteOperation(name_context, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerCombineFn): op = CombineOperation(name_context, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerPartialGroupByKey): op = create_pgbk_op(name_context, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerDoFn): op = DoOperation(name_context, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerGroupingShuffleRead): from dataflow_worker.shuffle_operations import GroupedShuffleReadOperation op = GroupedShuffleReadOperation(name_context, spec, counter_factory, state_sampler, shuffle_source=test_shuffle_source) elif isinstance(spec, operation_specs.WorkerUngroupedShuffleRead): from dataflow_worker.shuffle_operations import UngroupedShuffleReadOperation op = UngroupedShuffleReadOperation(name_context, spec, counter_factory, state_sampler, shuffle_source=test_shuffle_source) elif isinstance(spec, operation_specs.WorkerInMemoryWrite): op = InMemoryWriteOperation(name_context, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerShuffleWrite): from dataflow_worker.shuffle_operations import ShuffleWriteOperation op = ShuffleWriteOperation(name_context, spec, counter_factory, state_sampler, shuffle_sink=test_shuffle_sink) elif isinstance(spec, operation_specs.WorkerFlatten): op = FlattenOperation(name_context, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerMergeWindows): from dataflow_worker.shuffle_operations import BatchGroupAlsoByWindowsOperation from dataflow_worker.shuffle_operations import StreamingGroupAlsoByWindowsOperation if is_streaming: op = StreamingGroupAlsoByWindowsOperation(name_context, spec, counter_factory, state_sampler) else: op = BatchGroupAlsoByWindowsOperation(name_context, spec, counter_factory, state_sampler) elif isinstance(spec, operation_specs.WorkerReifyTimestampAndWindows): from dataflow_worker.shuffle_operations import ReifyTimestampAndWindowsOperation op = ReifyTimestampAndWindowsOperation(name_context, spec, counter_factory, state_sampler) else: raise TypeError( 'Expected an instance of operation_specs.Worker* class ' 'instead of %s' % (spec, )) return op