Beispiel #1
0
def test_tempdir(name, clean=True):
  import tempfile
  path = os.path.join(tempfile.gettempdir(), name)

  if clean:
    from oarphpy.util import cleandir
    cleandir(path)
  
  return path
Beispiel #2
0
def test_tf_records_file_as_list_of_str():
  TEST_TEMPDIR = testutil.test_tempdir(
                      'test_tf_records_file_as_list_of_str')
  util.cleandir(TEST_TEMPDIR)
  
  # Create the fixture: simply three strings in the file.  A TFRecords file
  # is just a size-delimited concatenation of string records.
  ss = [b'foo', b'bar', b'bazzzz']
  fixture_path = os.path.join(TEST_TEMPDIR, 'test.tfrecord')

  with tf.io.TFRecordWriter(fixture_path) as writer:
    for s in ss:
      writer.write(s)
  
  # Test reading!
  tf_lst = util.TFRecordsFileAsListOfStrings(open(fixture_path, 'rb'))
  assert len(tf_lst) == len(ss)
  assert sorted(tf_lst) == sorted(ss)
  for i in range(len(ss)):
    assert tf_lst[i] == ss[i]
Beispiel #3
0
    def _create_new_egg(cls, src_root, out_dir):
        assert os.path.exists(src_root)
        assert os.path.exists(out_dir)

        MODNAME = os.path.basename(src_root)
        if sys.version_info.major >= 3:
            # For whatever reason,
            # In py 2.7.x, setuptools wants the path of the python module
            # In py 3.x, setuptools wants the directory containing the python module
            src_root = os.path.dirname(src_root)

        util.log.info("Using source root %s " % src_root)

        # Below is a programmatic way to run something like:
        # $ cd /opt/au && python setup.py clean bdist_egg
        # But we don't actually need a setup.py (!)
        # Based upon https://github.com/pypa/setuptools/blob/a94ccbf404a79d56f9b171024dee361de9a948da/setuptools/tests/test_bdist_egg.py#L30
        # See also:
        # * https://github.com/pypa/setuptools/blob/f52b3b1c976e54df7a70db42bf59ca283412b461/setuptools/dist.py
        # * https://github.com/pypa/setuptools/blob/46af765c49f548523b8212f6e08e1edb12f22ab6/setuptools/tests/test_sdist.py#L123
        # * https://github.com/pypa/setuptools/blob/566f3aadfa112b8d6b9a1ecf5178552f6e0f8c6c/setuptools/__init__.py#L51
        from setuptools.dist import Distribution
        from setuptools import PackageFinder
        MODNAME = MODNAME.replace('-', '_')  # setuptools will do it anyways

        # By default we only want MODNAME in the egg, but we'll support
        # multiple modules (e.g. both oarphpy and oaprhpy_test).
        include = [MODNAME + '*']
        if cls.SRC_ROOT_MODULES == ['*']:
            include = cls.SRC_ROOT_MODULES
        elif cls.SRC_ROOT_MODULES:
            include = [m + '*' for m in cls.SRC_ROOT_MODULES]

        # We want to confine setuptools to a clean directory because it'll create
        # stateful files and directories like `build/`
        setuptools_workdir = os.path.join(out_dir, 'workdir')
        util.cleandir(setuptools_workdir)
        dist = Distribution(attrs=dict(
            script_name='setup.py',
            script_args=[
                'clean',
                'bdist_egg',
                '--dist-dir',
                out_dir,
                '--bdist-dir',
                setuptools_workdir,
            ],
            name=MODNAME,
            src_root=src_root,
            packages=PackageFinder.find(where=src_root, include=include),
        ))
        util.log.info("Generating egg to %s ..." % out_dir)
        with util.with_cwd(setuptools_workdir):
            with util.quiet():
                dist.parse_command_line()
                dist.run_commands()

        # NB: This approach didn't work so well:
        # Typically we want to give spark the egg from:
        #  $ python setup.py bdist_egg
        # from setuptools.command import bdist_egg
        # cmd = bdist_egg.bdist_egg(
        #                 bdist_dir=os.path.dirname(setup_py_path), editable=True)
        # cmd.run()

        egg_path = os.path.join(out_dir, MODNAME + '-0.0.0' + _egg_py_suffix())
        assert os.path.exists(egg_path), "Can't find {}".format(egg_path)
        util.log.info("... done.  Egg at %s" % egg_path)
        return egg_path
    res = S.for_each_executor(spark, test_mylib)
    assert res and all(res)
  print('prog.py success!')
"""

if __name__ == '__main__':
    import os
    import sys

    from oarphpy import util

    TEST_TEMPDIR_ROOT = '/tmp/test_spark_script_in_standalone_lib'

    # First create a clean dir and a custom python library
    my_lib_root = os.path.join(TEST_TEMPDIR_ROOT, 'my_lib_root')
    util.cleandir(my_lib_root)

    CREATE_LIB_SCRIPT = """
    mkdir -p {src_root} &&
    mkdir -p {src_root}/mylib &&
    touch {src_root}/mylib/__init__.py
  """.format(src_root=my_lib_root)
    util.run_cmd(CREATE_LIB_SCRIPT)

    with open(os.path.join(my_lib_root, 'mylib', 'util.py'), 'w') as f:
        f.write(UTIL_PY_SRC)
    with open(os.path.join(my_lib_root, 'mylib', 'prog.py'), 'w') as f:
        f.write(PROG_PY_SRC)

    # Make sure the custom library works
    TEST_CMD = """
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

if __name__ == '__main__':
    import os
    import sys

    from oarphpy import util

    TEST_TEMPDIR_ROOT = '/tmp/test_spark_with_custom_library'

    # First create a clean dir and a custom python library
    my_src_root = os.path.join(TEST_TEMPDIR_ROOT, 'my_src_root')
    util.cleandir(my_src_root)

    CREATE_LIB_SCRIPT = """
    mkdir -p {src_root} &&
    mkdir -p {src_root}/mymodule &&
    touch {src_root}/mymodule/__init__.py &&
    echo "pi = 3.14" > {src_root}/mymodule/foo.py
  """.format(src_root=my_src_root)
    util.run_cmd(CREATE_LIB_SCRIPT)

    # Make sure the custom library works
    TEST_SCRIPT = """
    cd {src_root} &&
    {python} -c 'from mymodule.foo import pi; print(pi)'
  """.format(src_root=my_src_root, python=sys.executable)
    out = util.run_cmd(TEST_SCRIPT, collect=True)
Beispiel #6
0
def test_histogram_with_examples():
  pytest.importorskip('bokeh')
  np = pytest.importorskip('numpy')

  from oarphpy import util
  from oarphpy_test.testutil import get_fixture_path

  TEST_TEMPDIR = '/tmp/oarphpy/test_histogram_with_examples'
  util.cleandir(TEST_TEMPDIR)
  
  def check_fig(fig, fixture_name):
    actual_path = os.path.join(TEST_TEMPDIR, 'actual_' + fixture_name)
    util.log.info("Saving actual plot to %s" % actual_path)
    pl.save_bokeh_fig(fig, actual_path, title=fixture_name)
    
    actual_png_path = actual_path.replace('html', 'png')
    util.log.info("Saving screenshot of plot to %s" % actual_png_path)
    from bokeh.io import export_png
    export_png(fig, actual_png_path)

    expected_path = get_fixture_path(fixture_name)
    expected_png_path = expected_path.replace('html', 'png')

    # Compare using PNGs because writing real selenium tests is too much effort
    # for the value at this time.  We tried comparing the raw HTML but bokeh
    # appears to write non-deterministically and/or includes timestamped
    # material.
    import imageio
    actual = imageio.imread(actual_png_path)
    expected = imageio.imread(expected_png_path)
    util.log.info('Comparing against expected at %s' % expected_png_path)

    np.testing.assert_array_equal(
      actual, expected,
      err_msg=(
        "Page mismatch, actual %s != expected %s, check HTML and PNGs" % (
          actual_path, expected_path)))

  with LocalSpark.sess() as spark:
    
    # A simple table:
    # +------+------+---+                                                             
    # |mod_11|square|  x|
    # +------+------+---+
    # |     0|     0|  0|
    # |     1|     1|  1|
    # |     2|     4|  2|
    # |     3|     9|  3|
    #    ...
    # +------+------+---+
    from pyspark.sql import Row
    df = spark.createDataFrame([
      Row(x=x, mod_11=int(x % 11), square=x*x)
      for x in range(101)
    ])

    ### Check basic plotting
    plotter = pl.HistogramWithExamplesPlotter()
    fig = plotter.run(df, 'x')
    check_fig(fig, 'test_histogram_with_examples_1.html')

    ### Check plotting with custom example plotter
    class PlotterWithMicroFacet(pl.HistogramWithExamplesPlotter):
      SUB_PIVOT_COL = 'mod_11'
      NUM_BINS = 25

      def display_bucket(self, sub_pivot, bucket_id, irows):
        rows_str = "<br />".join(
            "x: {x} square: {square} mod_11: {mod_11}".format(**row.asDict())
            for row in sorted(irows, key=lambda r: r.x))
        TEMPLATE = """
          <b>Pivot: {spv} Bucket: {bucket_id} </b> <br/>
          {rows}
          <br/> <br/>
        """
        disp = TEMPLATE.format(
                  spv=sub_pivot,
                  bucket_id=bucket_id,
                  rows=rows_str)
        return bucket_id, disp
    
    plotter = PlotterWithMicroFacet()
    fig = plotter.run(df, 'square')
    check_fig(fig, 'test_histogram_with_examples_2.html')