예제 #1
0
  def test_cacheables(self):
    p = beam.Pipeline(interactive_runner.InteractiveRunner())
    ie.current_env().set_cache_manager(InMemoryCache(), p)
    # pylint: disable=range-builtin-not-iterating
    init_pcoll = p | 'Init Create' >> beam.Create(range(10))
    squares = init_pcoll | 'Square' >> beam.Map(lambda x: x * x)
    cubes = init_pcoll | 'Cube' >> beam.Map(lambda x: x**3)
    ib.watch(locals())

    pipeline_instrument = instr.build_pipeline_instrument(p)
    self.assertEqual(
        pipeline_instrument.cacheables,
        {
            pipeline_instrument._cacheable_key(init_pcoll): instr.Cacheable(
                var='init_pcoll',
                version=str(id(init_pcoll)),
                pcoll_id='ref_PCollection_PCollection_8',
                producer_version=str(id(init_pcoll.producer)),
                pcoll=init_pcoll),
            pipeline_instrument._cacheable_key(squares): instr.Cacheable(
                var='squares',
                version=str(id(squares)),
                pcoll_id='ref_PCollection_PCollection_9',
                producer_version=str(id(squares.producer)),
                pcoll=squares),
            pipeline_instrument._cacheable_key(cubes): instr.Cacheable(
                var='cubes',
                version=str(id(cubes)),
                pcoll_id='ref_PCollection_PCollection_10',
                producer_version=str(id(cubes.producer)),
                pcoll=cubes)
        })
예제 #2
0
  def test_cacheables(self):
    p = beam.Pipeline(interactive_runner.InteractiveRunner())
    ie.current_env().set_cache_manager(InMemoryCache(), p)
    # pylint: disable=range-builtin-not-iterating
    init_pcoll = p | 'Init Create' >> beam.Create(range(10))
    squares = init_pcoll | 'Square' >> beam.Map(lambda x: x * x)
    cubes = init_pcoll | 'Cube' >> beam.Map(lambda x: x**3)
    ib.watch(locals())

    pipeline_instrument = instr.build_pipeline_instrument(p)

    # TODO(BEAM-7760): The PipelineInstrument cacheables maintains a global list
    # of cacheable PCollections across all pipelines. Here we take the subset of
    # cacheables that only pertain to this test's pipeline.
    cacheables = {
        k: c
        for k,
        c in pipeline_instrument.cacheables.items() if c.pcoll.pipeline is p
    }

    self.assertEqual(
        cacheables,
        {
            pipeline_instrument._cacheable_key(init_pcoll): instr.Cacheable(
                var='init_pcoll',
                version=str(id(init_pcoll)),
                pcoll_id='ref_PCollection_PCollection_8',
                producer_version=str(id(init_pcoll.producer)),
                pcoll=init_pcoll),
            pipeline_instrument._cacheable_key(squares): instr.Cacheable(
                var='squares',
                version=str(id(squares)),
                pcoll_id='ref_PCollection_PCollection_9',
                producer_version=str(id(squares.producer)),
                pcoll=squares),
            pipeline_instrument._cacheable_key(cubes): instr.Cacheable(
                var='cubes',
                version=str(id(cubes)),
                pcoll_id='ref_PCollection_PCollection_10',
                producer_version=str(id(cubes.producer)),
                pcoll=cubes)
        })