def get_config(prng=False, ipu_id=-1, shards=1, number_of_replicas=1, max_cross_replica_buffer_size=10*1024*1024, merge_infeed_io_copies=True, fp_exceptions=True, xla_recompute=False, seed=None, profile=None, availableMemoryProportion=None, stable_norm=False): """Builds ipu_options""" profile_exec_modes = {"NO_PROFILE": ExecutionProfileType.NO_PROFILE, "TILE_PROFILE": ExecutionProfileType.TILE_PROFILE, "DEVICE_PROFILE": ExecutionProfileType.DEVICE_PROFILE, "IPU_PROFILE": ExecutionProfileType.IPU_PROFILE} config = utils.create_ipu_config(max_cross_replica_sum_buffer_size=max_cross_replica_buffer_size, merge_infeed_io_copies=merge_infeed_io_copies, always_rearrange_copies_on_the_host=False, profiling=profile is not None, profile_execution=profile_exec_modes[profile] if profile else None) if "GCL_REAL_COLLECTIVES" in os.environ: config = utils.set_gcl_options(config, num_io_tiles=128, gcl_options={"useGclCollectives": "true", }) if ipu_id == -1: config = utils.auto_select_ipus(config, number_of_replicas*shards) else: config = utils.select_ipus(config, [ipu_id]) config = utils.set_compilation_options(config, { "device.clearAtomicFlagAfterExchange": "false", "prng.enable": "true" if prng else "false", "target.deterministicWorkers": "false" if seed is None else "true", }) if availableMemoryProportion is not None: config = utils.set_convolution_options(config, { "availableMemoryProportion": str(availableMemoryProportion) }) if stable_norm: config = utils.set_norm_options(config, use_stable_statistics=True) if xla_recompute: utils.set_recomputation_options(config, allow_recompute=True) config = utils.set_floating_point_behaviour_options(config, inv=fp_exceptions, div0=fp_exceptions, oflo=fp_exceptions, esr=prng, nanoo=True) return config
def get_config(prng=False, ipu_id=-1, shards=1, number_of_replicas=1, max_cross_replica_buffer_size=10 * 1024 * 1024, merge_infeed_io_copies=True, fp_exceptions=True, xla_recompute=False, seed=None, profile=False, availableMemoryProportion=None): """Builds ipu_options""" config = utils.create_ipu_config( max_cross_replica_sum_buffer_size=max_cross_replica_buffer_size, merge_infeed_io_copies=merge_infeed_io_copies, always_rearrange_copies_on_the_host=False, profiling=profile, profile_execution=profile) if ipu_id == -1: config = utils.auto_select_ipus(config, number_of_replicas * shards) else: config = utils.select_ipus(config, [ipu_id]) config = utils.set_compilation_options( config, { "device.clearAtomicFlagAfterExchange": "false", "prng.enable": "true" if prng else "false", "target.deterministicWorkers": "false" if seed is None else "true", }) if availableMemoryProportion is not None: config = utils.set_convolution_options( config, {"availableMemoryProportion": str(availableMemoryProportion)}) if xla_recompute: utils.set_recomputation_options(config, allow_recompute=True) config = utils.set_floating_point_behaviour_options(config, inv=fp_exceptions, div0=fp_exceptions, oflo=fp_exceptions, esr=prng, nanoo=True) return config
def get_ipu_config(fp_exceptions=True, stochastic_rounding=True, xla_recompute=False, available_memory_proportion=None, disable_graph_outlining=False, num_ipus_required=0, max_cross_replica_sum_buffer_size=0, scheduler_selection='', compile_only=False, partials_type="half"): """Builds ipu_options""" config = utils.create_ipu_config( max_report_size=3001819596000, merge_infeed_io_copies=True, always_rearrange_copies_on_the_host=False, selection_order=utils.SelectionOrder.AUTO, disable_graph_outlining=disable_graph_outlining, max_cross_replica_sum_buffer_size=max_cross_replica_sum_buffer_size, scheduler_selection=scheduler_selection) config = utils.auto_select_ipus(config, num_ipus_required) config = utils.set_matmul_options(config, clear_pass_type=True) if available_memory_proportion is not None: config = utils.set_convolution_options( config, { "availableMemoryProportion": str(available_memory_proportion), "partialsType": partials_type }) config = utils.set_matmul_options( config, { "availableMemoryProportion": str(available_memory_proportion), "partialsType": partials_type }) config = utils.set_norm_options(config, use_stable_statistics=True) config = utils.set_recomputation_options(config, allow_recompute=xla_recompute) if compile_only: config = utils.set_ipu_connection_type( config, utils.DeviceConnectionType.NEVER, ipu_version=2, enable_remote_buffers=True) config = utils.set_floating_point_behaviour_options( config, inv=fp_exceptions, div0=fp_exceptions, oflo=fp_exceptions, esr=stochastic_rounding, nanoo=fp_exceptions) return config
def generic_train_graph(opts, is_training): data_type = 'float32' train_graph = tf.Graph() with train_graph.as_default(): placeholders = {} placeholders["learning_rate"] = tf.compat.v1.placeholder(data_type, shape=[]) uid_embedding, mid_embedding, cat_embedding = id_embedding(opts, is_training, seed) if opts['use_synthetic_data']: dataset_train = get_synthetic_dataset(opts) else: dataset_train = get_dataset_embed(opts, is_training=True) infeed_train = ipu_infeed_queue.IPUInfeedQueue(dataset_train, feed_name = 'DIN_dataset_infeed_train', replication_factor = (opts['replicas'])) with ipu_scope('/device:IPU:0'): def comp_fn(): def body(total_loss, total_aux_loss, total_accuracy, uids, mids, cats, mid_his, cat_his, mid_mask, target, seqlen): prob, loss, aux_loss, accuracy, grad_op = graph_builder(opts, uid_embedding, mid_embedding, cat_embedding, placeholders['learning_rate'], uids, mids, cats, mid_his, cat_his, mid_mask, target, seqlen, use_negsampling=False) with tf.control_dependencies([grad_op]): return total_loss + loss, total_aux_loss + aux_loss, total_accuracy + accuracy return loops.repeat(opts['batches_per_step'], body, [tf.constant(0, getattr(np, 'float32'))] * 3, infeed_train) outputs_train = ipu_compiler.compile(comp_fn, []) avg_loss, avg_aux_loss, avg_accuracy = [x / opts['batches_per_step'] for x in outputs_train] outfeed = None saver = tf.compat.v1.train.Saver() utils.move_variable_initialization_to_cpu() init = tf.compat.v1.global_variables_initializer() if opts['use_ipu_model']: os.environ["TF_POPLAR_FLAGS"] = "--use_ipu_model" ipu_options = utils.create_ipu_config() ipu_options = utils.set_optimization_options(ipu_options, combine_embedding_lookups=True) ipu_options = utils.set_recomputation_options(ipu_options, allow_recompute=True) ipu_options = utils.auto_select_ipus(ipu_options, [opts['replicas']]) utils.configure_ipu_system(ipu_options) if seed is not None: utils.reset_ipu_seed(seed) ops_train = [avg_loss, avg_aux_loss, avg_accuracy] sess = tf.compat.v1.Session(graph=train_graph) return GraphOps(sess, init, ops_train, placeholders, infeed_train, outfeed, saver), uid_embedding, mid_embedding, cat_embedding
def run_language_model(opts): if opts.random_seed is not None: utils.reset_ipu_seed(opts.random_seed) # Setup and acquire an IPU device: logging.info("Acquiring devices") if not opts.pipeline: opts.num_shards = 1 # FIX-ME enable sparse models using multiple shards # Make sure that no matter the number of shards/stages required, we always # acquire a power of 2 ipus (else attachment will fail) k = 0 while 2**k < opts.num_shards: k += 1 num_ipus = 2**k logger.info(f"Need {opts.num_shards} IPUs, requesting {num_ipus}") config = utils.create_ipu_config() if opts.compile_only: if opts.compile_only_ipu_version is None: raise AttributeError( "Must provide --compile-only-ipu-version if --compile-only is set." ) config = utils.set_ipu_connection_type( config, utils.DeviceConnectionType.NEVER, ipu_version=opts.compile_only_ipu_version, enable_remote_buffers=True) config = utils.auto_select_ipus(config, num_ipus) config = utils.set_recomputation_options(config, allow_recompute=opts.recompute) # Enable stochastic rounding config = utils.set_floating_point_behaviour_options(config, inv=False, div0=False, oflo=False, esr=True, nanoo=False) config = sparse.set_system_config( config, custom_op_debug_printing=opts.debug_dense_grad) utils.configure_ipu_system(config) transformer = DynsparseTransformer(opts) if opts.mode in ["all", "train"]: run_training(opts, transformer) if opts.mode in ["all", "test"]: run_testing(opts, transformer)
def get_config(fp_exceptions, xla_recompute, disable_graph_outlining, num_required_ipus, enable_stochastic_rounding, max_cross_replica_sum_buffer_size, scheduler_selection, compile_only, ipu_id): # Builds ipu_options config = utils.create_ipu_config( merge_infeed_io_copies=True, always_rearrange_copies_on_the_host=False, disable_graph_outlining=disable_graph_outlining, selection_order=utils.SelectionOrder.AUTO, scheduler_selection=scheduler_selection ) if ipu_id: config = utils.select_ipus(config, [ipu_id]) else: config = utils.auto_select_ipus(config, num_required_ipus) config = utils.set_recomputation_options( config, allow_recompute=xla_recompute) # simple way to skip the big `Transpose` operation due to bad allocation # config = utils.set_matmul_options(config, clear_pass_type=True) config = utils.set_norm_options(config, use_stable_statistics=True) config = utils.set_floating_point_behaviour_options( config, inv=fp_exceptions, div0=fp_exceptions, oflo=fp_exceptions, esr=enable_stochastic_rounding, nanoo=fp_exceptions) config = utils.set_optimization_options( config, merge_remote_buffers=True, max_cross_replica_sum_buffer_size=max_cross_replica_sum_buffer_size) # Do not acquire a device, compile only. if compile_only: config = utils.set_ipu_connection_type( config, utils.DeviceConnectionType.NEVER, ipu_version=2, enable_remote_buffers=True) return config
def get_config(prng=False, ipu_id=-1, shards=1, number_of_replicas=1, max_cross_replica_buffer_size=10 * 1024 * 1024, merge_infeed_io_copies=True, fp_exceptions=True, half_partials=False, conv_dithering=False, xla_recompute=False, seed=None, profile=None, availableMemoryProportion=None, stable_norm=False, internalExchangeOptimisationTarget=None, limitVertexState=None): """Builds ipu_options""" profile_exec_modes = { "NO_PROFILE": ExecutionProfileType.NO_PROFILE, "TILE_PROFILE": ExecutionProfileType.TILE_PROFILE, "DEVICE_PROFILE": ExecutionProfileType.DEVICE_PROFILE, "IPU_PROFILE": ExecutionProfileType.IPU_PROFILE } config = utils.create_ipu_config( merge_infeed_io_copies=merge_infeed_io_copies, always_rearrange_copies_on_the_host=False, profiling=profile is not None, profile_execution=profile_exec_modes[profile] if profile else None) config = utils.set_optimization_options( config, max_cross_replica_sum_buffer_size=max_cross_replica_buffer_size) if ipu_id == -1: config = utils.auto_select_ipus(config, number_of_replicas * shards) else: config = utils.select_ipus(config, [ipu_id]) config = utils.set_compilation_options( config, { "device.clearAtomicFlagAfterExchange": "false", "prng.enable": "true" if prng else "false", "target.deterministicWorkers": "false" if seed is None else "portable", }) if internalExchangeOptimisationTarget is not None: utils.set_compilation_options( config, { "opt.internalExchangeOptimisationTarget": internalExchangeOptimisationTarget }) if limitVertexState is not None: config = utils.set_compilation_options( config, { "opt.limitVertexStateToLower256K": "true" if limitVertexState else "false" }) if availableMemoryProportion is not None: config = utils.set_convolution_options( config, {"availableMemoryProportion": str(availableMemoryProportion)}) if half_partials: config = utils.set_convolution_options(config, {"partialsType": 'half'}) config = utils.set_matmul_options(config, {"partialsType": 'half'}) if conv_dithering: config = utils.set_convolution_options(config, {"enableConvDithering": "true"}) if stable_norm: config = utils.set_norm_options(config, use_stable_statistics=True) if xla_recompute: utils.set_recomputation_options(config, allow_recompute=True) config = utils.set_floating_point_behaviour_options(config, inv=fp_exceptions, div0=fp_exceptions, oflo=fp_exceptions, esr=prng, nanoo=True) return config
def generic_infer_graph(opts, is_training): data_type = 'float32' infer_graph = tf.Graph() with infer_graph.as_default(): placeholders = {} placeholders["learning_rate"] = tf.compat.v1.placeholder(data_type, shape=[]) uid_embedding, mid_embedding, cat_embedding = id_embedding( opts, is_training, seed) if opts['use_synthetic_data']: dataset_val = get_synthetic_dataset(opts) else: dataset_val = get_dataset_embed(opts, is_training=False) infeed_val = ipu_infeed_queue.IPUInfeedQueue( dataset_val, feed_name='DIN_dataset_infeed_val', replication_factor=(opts['replicas'])) outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue( feed_name="DIN_validation_outfeed", replication_factor=opts['replicas']) with ipu_scope('/device:IPU:0'): def comp_fn_validate(): def body(uids, mids, cats, mid_his, cat_his, mid_mask, target, seqlen): prob, loss_total, _, accuracy, _ = graph_builder( opts, uid_embedding, mid_embedding, cat_embedding, placeholders['learning_rate'], uids, mids, cats, mid_his, cat_his, mid_mask, target, seqlen, use_negsampling=False) outfeed_op = outfeed_queue.enqueue( (prob, target, accuracy)) return outfeed_op return loops.repeat(opts['batches_per_step'], body, [], infeed_val) outputs_val = ipu_compiler.compile(comp_fn_validate, []) outfeed = outfeed_queue.dequeue() saver = tf.compat.v1.train.Saver() utils.move_variable_initialization_to_cpu() init = tf.compat.v1.global_variables_initializer() if opts['use_ipu_model']: os.environ["TF_POPLAR_FLAGS"] = "--use_ipu_model" ipu_options = utils.create_ipu_config() ipu_options = utils.set_optimization_options( ipu_options, combine_embedding_lookups=True) ipu_options = utils.set_recomputation_options(ipu_options, allow_recompute=True) ipu_options = utils.auto_select_ipus(ipu_options, [opts['replicas']]) utils.configure_ipu_system(ipu_options) if seed is not None: utils.reset_ipu_seed(seed) ops_val = [outputs_val] sess = tf.compat.v1.Session(graph=infer_graph) return GraphOps(sess, init, ops_val, placeholders, infeed_val, outfeed, saver), uid_embedding, mid_embedding, cat_embedding
def get_config(prng=False, ipu_id=-1, shards=1, number_of_replicas=1, max_cross_replica_buffer_size=10*1024*1024, merge_infeed_io_copies=True, fp_exceptions=True, half_partials=False, conv_dithering=False, xla_recompute=False, seed=None, profile=None, availableMemoryProportion=None, stable_norm=False, internalExchangeOptimisationTarget=None): """Builds ipu_options""" profile_exec_modes = {"NO_PROFILE": ExecutionProfileType.NO_PROFILE, "TILE_PROFILE": ExecutionProfileType.TILE_PROFILE, "DEVICE_PROFILE": ExecutionProfileType.DEVICE_PROFILE, "IPU_PROFILE": ExecutionProfileType.IPU_PROFILE} config = utils.create_ipu_config(merge_infeed_io_copies=merge_infeed_io_copies, always_rearrange_copies_on_the_host=False, profiling=profile is not None, profile_execution=profile_exec_modes[profile] if profile else None) config = utils.set_optimization_options(config, max_cross_replica_sum_buffer_size=max_cross_replica_buffer_size) if "GCL_REAL_COLLECTIVES" in os.environ: # The GCL_NUM_IO_TILES environment variable sets how many tiles in the IPU are reserved for Graphcore Communication Library (GCL) collectives. iotiles = int(os.environ['GCL_NUM_IO_TILES']) if iotiles % 2 or iotiles < 32 or iotiles > 192: raise ValueError( 'GCL IO Tiles must be a multiple of 2 in between 32 and 192.'.format(iotiles)) config = utils.set_gcl_options(config, num_io_tiles=iotiles, gcl_options={ "useGclCollectives": "true", }) if ipu_id == -1: config = utils.auto_select_ipus(config, number_of_replicas*shards) else: config = utils.select_ipus(config, [ipu_id]) config = utils.set_compilation_options(config, { "device.clearAtomicFlagAfterExchange": "false", "prng.enable": "true" if prng else "false", "target.deterministicWorkers": "false" if seed is None else "portable", }) if internalExchangeOptimisationTarget is not None: utils.set_compilation_options(config, { "opt.internalExchangeOptimisationTarget": internalExchangeOptimisationTarget }) if availableMemoryProportion is not None: config = utils.set_convolution_options(config, { "availableMemoryProportion": str(availableMemoryProportion) }) if half_partials: config = utils.set_convolution_options(config, { "partialsType": 'half' }) config = utils.set_matmul_options(config, { "partialsType": 'half' }) if conv_dithering: config = utils.set_convolution_options(config, { "enableConvDithering": "true" }) if stable_norm: config = utils.set_norm_options(config, use_stable_statistics=True) if xla_recompute: utils.set_recomputation_options(config, allow_recompute=True) config = utils.set_floating_point_behaviour_options(config, inv=fp_exceptions, div0=fp_exceptions, oflo=fp_exceptions, esr=prng, nanoo=True) return config
def generic_graph(opts): data_type = get_tf_datatype(opts) graph = tf.Graph() with graph.as_default(): placeholders = {} placeholders["learning_rate"] = tf.placeholder(data_type, shape=[]) uid_embedding, mid_embedding, cat_embedding = id_embedding( opts, True, opts['seed']) if opts['use_synthetic_data']: dataset = get_synthetic_dataset(opts, return_neg=True) feed_dict_values = {} else: dataset, feed_dict_values = get_dataset_embed_from_tensors( opts, data_type) infeed = ipu_infeed_queue.IPUInfeedQueue( dataset, feed_name='DIEN_dataset_infeed', replication_factor=(opts['replicas'])) with ipu_scope('/device:IPU:0'): def comp_fn(): def body(total_loss, total_aux_loss, total_accuracy, uids, mids, cats, mid_his, cat_his, mid_mask, target, seqlen, noclk_mids, noclk_cats): prob, loss, aux_loss, accuracy, grad_op = graph_builder( opts, uid_embedding, mid_embedding, cat_embedding, placeholders['learning_rate'], uids, mids, cats, mid_his, cat_his, mid_mask, target, seqlen, noclk_mids, noclk_cats, use_negsampling=True) with tf.control_dependencies([grad_op]): return total_loss + loss, total_aux_loss + aux_loss, total_accuracy + accuracy return loops.repeat(opts['batches_per_step'], body, [tf.constant(0, data_type)] * 3, infeed) outputs_train = ipu_compiler.compile(comp_fn, []) avg_loss, avg_aux_loss, avg_accuracy = [ x / opts['batches_per_step'] for x in outputs_train ] saver = tf.train.Saver() utils.move_variable_initialization_to_cpu() init = tf.global_variables_initializer() if opts['use_ipu_model']: os.environ["TF_POPLAR_FLAGS"] = "--use_ipu_model" ipu_options = utils.create_ipu_config( profiling=False, profile_execution=False, max_cross_replica_sum_buffer_size=10000000, max_inter_ipu_copies_buffer_size=10000000) ipu_options = utils.set_recomputation_options(ipu_options, allow_recompute=True) ipu_options = utils.auto_select_ipus(ipu_options, [opts['replicas']]) utils.configure_ipu_system(ipu_options) utils.reset_ipu_seed(opts['seed']) graph_outputs = [avg_loss, avg_aux_loss, avg_accuracy] sess = tf.Session(graph=graph) return GraphOps( sess, init, graph_outputs, placeholders, infeed, saver, feed_dict_values), uid_embedding, mid_embedding, cat_embedding
def pipeline_on_ipu(stages, inputs_fn, input_values, repeat_count, gradient_accumulation_count, dataset_fn, optimizer, test_wrapper, expected_max_tile_memory, recomp, schedule, device_mapping=None, batch_serialization_iterations=1): g = ops.Graph() with g.as_default(), test_wrapper.test_session(graph=g) as session: dataset = dataset_fn() inputs = inputs_fn() infeed_queue = ipu_infeed_queue.IPUInfeedQueue( dataset, next_feed_id()) outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(next_feed_id()) with variable_scope.variable_scope("ipu", use_resource=True, reuse=False): def optimizer_function(loss): return pipelining_ops.OptimizerFunctionOutput( optimizer, loss) def my_net(*args): return pipelining_ops.pipeline( stages, gradient_accumulation_count, repeat_count=repeat_count, batch_serialization_iterations= batch_serialization_iterations, inputs=args, optimizer_function=optimizer_function, infeed_queue=infeed_queue, outfeed_queue=outfeed_queue, pipeline_schedule=schedule, device_mapping=device_mapping) with ops.device("/device:IPU:0"): compiled_model_pipeline = ipu_compiler.compile(my_net, inputs=inputs) # Execution profiles of code with dynamic control flow are not supported # on real HW. profiling = utils.running_on_ipu_model() cfg = utils.create_ipu_config(profiling=profiling, profile_execution=profiling) cfg = utils.set_ipu_model_options(cfg, compile_ipu_code=True, tiles_per_ipu=128) num_ipus = get_num_ipus(device_mapping) if device_mapping else 4 cfg = utils.auto_select_ipus(cfg, num_ipus) if recomp: cfg = utils.set_recomputation_options(cfg, allow_recompute=True) utils.configure_ipu_system(cfg) utils.move_variable_initialization_to_cpu() outfeed_op = outfeed_queue.dequeue() report = tu.ReportJSON(test_wrapper, session, configure_device=False) session.run(variables.global_variables_initializer()) session.run(infeed_queue.initializer) report.reset() session.run(compiled_model_pipeline, feed_dict=dict(zip(inputs, input_values))) out = session.run(outfeed_op)[0] if profiling: report.parse_log() if not device_mapping: device_mapping = [ i - (i % 4) + ((i % 4) if (i % 4) < 2 else 5 - (i % 4)) for i in range(len(stages)) ] report.assert_pipeline_stages_on_expected_ipu(device_mapping) report.assert_max_tile_memory(expected_max_tile_memory, tolerance=0.3) return out
def _sharded_on_ipu(stages, inputs_fn, input_values, repeat_count, num_batches_to_accumulate, dataset_fn, optimizer, test_wrapper, recomp, device_mapping): g = ops.Graph() with g.as_default(), test_wrapper.test_session(graph=g) as session: dataset = dataset_fn() inputs = inputs_fn() infeed_queue = ipu_infeed_queue.IPUInfeedQueue( dataset, next_feed_id()) outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(next_feed_id()) with variable_scope.variable_scope("ipu_sharded", use_resource=True, reuse=False): if device_mapping is None: device_mapping = range(len(stages)) def pipeline(*args): outputs = args for i, stage in zip(device_mapping, stages): with scopes.ipu_shard(i): outputs = stage( *functional_ops._convert_to_list(outputs)) # pylint: disable=W0212 loss = outputs enqueue_op = outfeed_queue.enqueue(loss) opt = gradient_accumulation_optimizer.GradientAccumulationOptimizer( optimizer, num_batches_to_accumulate) outs = list(args[:len(args) - infeed_queue.number_of_tuple_elements]) outs.append(enqueue_op) outs.append(opt.minimize(loss)) return outs def my_net(*args): return loops.repeat(num_batches_to_accumulate, pipeline, inputs=args, infeed_queue=infeed_queue) with ops.device("/device:IPU:0"): compiled_model_pipeline = ipu_compiler.compile(my_net, inputs=inputs) outfeed_op = outfeed_queue.dequeue() # Execution profiles of code with dynamic control flow are not supported on real HW profiling = utils.running_on_ipu_model() cfg = utils.create_ipu_config(profiling=profiling, profile_execution=profiling) cfg = utils.set_ipu_model_options(cfg, compile_ipu_code=True, tiles_per_ipu=128) num_ipus = get_num_ipus(device_mapping) if device_mapping else 4 cfg = utils.auto_select_ipus(cfg, num_ipus) if recomp: cfg = utils.set_recomputation_options(cfg, allow_recompute=True) utils.configure_ipu_system(cfg) utils.move_variable_initialization_to_cpu() session.run(variables.global_variables_initializer()) session.run(infeed_queue.initializer) for _ in range(repeat_count): session.run(compiled_model_pipeline, feed_dict=dict(zip(inputs, input_values))) return session.run(outfeed_op)