def test_tempdir(name, clean=True): import tempfile path = os.path.join(tempfile.gettempdir(), name) if clean: from oarphpy.util import cleandir cleandir(path) return path
def test_tf_records_file_as_list_of_str(): TEST_TEMPDIR = testutil.test_tempdir( 'test_tf_records_file_as_list_of_str') util.cleandir(TEST_TEMPDIR) # Create the fixture: simply three strings in the file. A TFRecords file # is just a size-delimited concatenation of string records. ss = [b'foo', b'bar', b'bazzzz'] fixture_path = os.path.join(TEST_TEMPDIR, 'test.tfrecord') with tf.io.TFRecordWriter(fixture_path) as writer: for s in ss: writer.write(s) # Test reading! tf_lst = util.TFRecordsFileAsListOfStrings(open(fixture_path, 'rb')) assert len(tf_lst) == len(ss) assert sorted(tf_lst) == sorted(ss) for i in range(len(ss)): assert tf_lst[i] == ss[i]
def _create_new_egg(cls, src_root, out_dir): assert os.path.exists(src_root) assert os.path.exists(out_dir) MODNAME = os.path.basename(src_root) if sys.version_info.major >= 3: # For whatever reason, # In py 2.7.x, setuptools wants the path of the python module # In py 3.x, setuptools wants the directory containing the python module src_root = os.path.dirname(src_root) util.log.info("Using source root %s " % src_root) # Below is a programmatic way to run something like: # $ cd /opt/au && python setup.py clean bdist_egg # But we don't actually need a setup.py (!) # Based upon https://github.com/pypa/setuptools/blob/a94ccbf404a79d56f9b171024dee361de9a948da/setuptools/tests/test_bdist_egg.py#L30 # See also: # * https://github.com/pypa/setuptools/blob/f52b3b1c976e54df7a70db42bf59ca283412b461/setuptools/dist.py # * https://github.com/pypa/setuptools/blob/46af765c49f548523b8212f6e08e1edb12f22ab6/setuptools/tests/test_sdist.py#L123 # * https://github.com/pypa/setuptools/blob/566f3aadfa112b8d6b9a1ecf5178552f6e0f8c6c/setuptools/__init__.py#L51 from setuptools.dist import Distribution from setuptools import PackageFinder MODNAME = MODNAME.replace('-', '_') # setuptools will do it anyways # By default we only want MODNAME in the egg, but we'll support # multiple modules (e.g. both oarphpy and oaprhpy_test). include = [MODNAME + '*'] if cls.SRC_ROOT_MODULES == ['*']: include = cls.SRC_ROOT_MODULES elif cls.SRC_ROOT_MODULES: include = [m + '*' for m in cls.SRC_ROOT_MODULES] # We want to confine setuptools to a clean directory because it'll create # stateful files and directories like `build/` setuptools_workdir = os.path.join(out_dir, 'workdir') util.cleandir(setuptools_workdir) dist = Distribution(attrs=dict( script_name='setup.py', script_args=[ 'clean', 'bdist_egg', '--dist-dir', out_dir, '--bdist-dir', setuptools_workdir, ], name=MODNAME, src_root=src_root, packages=PackageFinder.find(where=src_root, include=include), )) util.log.info("Generating egg to %s ..." % out_dir) with util.with_cwd(setuptools_workdir): with util.quiet(): dist.parse_command_line() dist.run_commands() # NB: This approach didn't work so well: # Typically we want to give spark the egg from: # $ python setup.py bdist_egg # from setuptools.command import bdist_egg # cmd = bdist_egg.bdist_egg( # bdist_dir=os.path.dirname(setup_py_path), editable=True) # cmd.run() egg_path = os.path.join(out_dir, MODNAME + '-0.0.0' + _egg_py_suffix()) assert os.path.exists(egg_path), "Can't find {}".format(egg_path) util.log.info("... done. Egg at %s" % egg_path) return egg_path
res = S.for_each_executor(spark, test_mylib) assert res and all(res) print('prog.py success!') """ if __name__ == '__main__': import os import sys from oarphpy import util TEST_TEMPDIR_ROOT = '/tmp/test_spark_script_in_standalone_lib' # First create a clean dir and a custom python library my_lib_root = os.path.join(TEST_TEMPDIR_ROOT, 'my_lib_root') util.cleandir(my_lib_root) CREATE_LIB_SCRIPT = """ mkdir -p {src_root} && mkdir -p {src_root}/mylib && touch {src_root}/mylib/__init__.py """.format(src_root=my_lib_root) util.run_cmd(CREATE_LIB_SCRIPT) with open(os.path.join(my_lib_root, 'mylib', 'util.py'), 'w') as f: f.write(UTIL_PY_SRC) with open(os.path.join(my_lib_root, 'mylib', 'prog.py'), 'w') as f: f.write(PROG_PY_SRC) # Make sure the custom library works TEST_CMD = """
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. if __name__ == '__main__': import os import sys from oarphpy import util TEST_TEMPDIR_ROOT = '/tmp/test_spark_with_custom_library' # First create a clean dir and a custom python library my_src_root = os.path.join(TEST_TEMPDIR_ROOT, 'my_src_root') util.cleandir(my_src_root) CREATE_LIB_SCRIPT = """ mkdir -p {src_root} && mkdir -p {src_root}/mymodule && touch {src_root}/mymodule/__init__.py && echo "pi = 3.14" > {src_root}/mymodule/foo.py """.format(src_root=my_src_root) util.run_cmd(CREATE_LIB_SCRIPT) # Make sure the custom library works TEST_SCRIPT = """ cd {src_root} && {python} -c 'from mymodule.foo import pi; print(pi)' """.format(src_root=my_src_root, python=sys.executable) out = util.run_cmd(TEST_SCRIPT, collect=True)
def test_histogram_with_examples(): pytest.importorskip('bokeh') np = pytest.importorskip('numpy') from oarphpy import util from oarphpy_test.testutil import get_fixture_path TEST_TEMPDIR = '/tmp/oarphpy/test_histogram_with_examples' util.cleandir(TEST_TEMPDIR) def check_fig(fig, fixture_name): actual_path = os.path.join(TEST_TEMPDIR, 'actual_' + fixture_name) util.log.info("Saving actual plot to %s" % actual_path) pl.save_bokeh_fig(fig, actual_path, title=fixture_name) actual_png_path = actual_path.replace('html', 'png') util.log.info("Saving screenshot of plot to %s" % actual_png_path) from bokeh.io import export_png export_png(fig, actual_png_path) expected_path = get_fixture_path(fixture_name) expected_png_path = expected_path.replace('html', 'png') # Compare using PNGs because writing real selenium tests is too much effort # for the value at this time. We tried comparing the raw HTML but bokeh # appears to write non-deterministically and/or includes timestamped # material. import imageio actual = imageio.imread(actual_png_path) expected = imageio.imread(expected_png_path) util.log.info('Comparing against expected at %s' % expected_png_path) np.testing.assert_array_equal( actual, expected, err_msg=( "Page mismatch, actual %s != expected %s, check HTML and PNGs" % ( actual_path, expected_path))) with LocalSpark.sess() as spark: # A simple table: # +------+------+---+ # |mod_11|square| x| # +------+------+---+ # | 0| 0| 0| # | 1| 1| 1| # | 2| 4| 2| # | 3| 9| 3| # ... # +------+------+---+ from pyspark.sql import Row df = spark.createDataFrame([ Row(x=x, mod_11=int(x % 11), square=x*x) for x in range(101) ]) ### Check basic plotting plotter = pl.HistogramWithExamplesPlotter() fig = plotter.run(df, 'x') check_fig(fig, 'test_histogram_with_examples_1.html') ### Check plotting with custom example plotter class PlotterWithMicroFacet(pl.HistogramWithExamplesPlotter): SUB_PIVOT_COL = 'mod_11' NUM_BINS = 25 def display_bucket(self, sub_pivot, bucket_id, irows): rows_str = "<br />".join( "x: {x} square: {square} mod_11: {mod_11}".format(**row.asDict()) for row in sorted(irows, key=lambda r: r.x)) TEMPLATE = """ <b>Pivot: {spv} Bucket: {bucket_id} </b> <br/> {rows} <br/> <br/> """ disp = TEMPLATE.format( spv=sub_pivot, bucket_id=bucket_id, rows=rows_str) return bucket_id, disp plotter = PlotterWithMicroFacet() fig = plotter.run(df, 'square') check_fig(fig, 'test_histogram_with_examples_2.html')