def test_cli_datasets_get(self): # write a csv, read back fn = temp_filename(ext='csv') self.make_dataset_from_dataframe('test') self.cli(f'datasets get test {fn}') df = pd.read_csv(fn) expected = self.om.datasets.get('test') assert_frame_equal(df, expected) # write a csv, read back with options fn = temp_filename(ext='csv') self.make_dataset_from_dataframe('test-sep') self.cli(f'datasets get test-sep {fn} --csv sep=; --csv columns=x,') df = pd.read_csv(fn, sep=';') expected = self.om.datasets.get('test')[['x']] assert_frame_equal(expected, df)
def _fit_tpu(self, modelname, Xname, Yname=None, tpu_specs=None, **kwargs): import tensorflow as tf # adopted from https://www.dlology.com/blog/how-to-train-keras-model-x20-times-faster-with-tpu-for-free/ # This address identifies the TPU we'll use when configuring TensorFlow. # FIXME this will fail in tf 2.0, see https://github.com/tensorflow/tensorflow/issues/24412#issuecomment-491980177 assert tf.__version__.startswith('1.'), "TPU only supported on tf < 2.0" tpu_device = tpu_specs or os.environ.get('COLAB_TPU_ADDR', '') assert tpu_device, "there is no TPU device" if tpu_device.startswith('grpc://'): tpu_worker = tpu_device else: tpu_worker = 'grpc://' + tpu_device tf.logging.set_verbosity(tf.logging.INFO) model = self.get_model(modelname) tpu_model = tf.contrib.tpu.keras_to_tpu_model( model, strategy=tf.contrib.tpu.TPUDistributionStrategy( tf.contrib.cluster_resolver.TPUClusterResolver(tpu_worker))) X = self.data_store.get(Xname) Y = self.data_store.get(Yname) tpu_model.fit(X, Y) fn = temp_filename() tpu_model.save_weights(fn, overwrite=True) model.load_weights(fn) meta = self.put(model, modelname) return meta
def test_cli_jobs_get(self): self.create_job('test') fn = temp_filename(ext='ipynb') self.cli(f'jobs get test {fn}') self.assertTrue(os.path.exists(fn)) with open(fn, 'r') as fin: nb = nbformat.read(fin, as_version=4) self.assertIn('cells', nb) self.assertIn('hello', nb['cells'][0]['source'])
def test_cli_jobs_put(self): self.assertNotIn('test', self.om.jobs.list()) cells = [] code = "print 'hello'" cells.append(nbformat.v4.new_code_cell(source=code)) notebook = nbformat.v4.new_notebook(cells=cells) fn = temp_filename(ext='ipynb') with open(fn, 'w') as fout: nbformat.write(notebook, fout, version=4) self.cli(f'jobs put {fn} test') self.assertLogContains('info', 'Metadata') self.assertLogContains('info', 'name=test') self.assertIn('test.ipynb', self.om.jobs.list())
def r_source(r_code): """ helper to source given r code and reticulated r session Args: r_code (str): the string of R code, as entered in REPL or R file """ r = rhelper() fn = temp_filename(ext='R') with open(fn, 'w') as fout: fout.write(r_code) result = r.source(fn) remove_temp_filename(fn) return result
def test_cli_datasets_put(self): # write a csv, read back fn = temp_filename(ext='csv') df = self.create_local_csv(fn) self.cli(f'datasets put {fn} foobar') self.assertLogContains('info', 'Metadata(') self.assertLogContains('info', 'kind=pandas.dfrows') dfx = self.om.datasets.get('foobar') assert_frame_equal(df, dfx) # write a csv, read back, use custom sep fn = temp_filename(ext='csv') df = self.create_local_csv(fn, sep=';') self.cli(f'datasets put {fn} foxbaz --csv sep=;') self.assertLogContains('info', 'Metadata(') self.assertLogContains('info', 'kind=pandas.dfrows') dfx = self.om.datasets.get('foxbaz') assert_frame_equal(df, dfx) # write some other file fn = temp_filename(ext='binary') df = self.create_local_csv(fn) self.cli(f'datasets put {fn} binary', new_start=True) print(self.get_log('info')) self.assertLogContains('info', 'Metadata(') self.assertLogContains('info', 'kind=python.file') with open(fn, 'rb') as fin: expected = fin.read() data = self.om.datasets.get('binary').read() self.assertEqual(expected, data) # write image file fn = temp_filename(ext='jpg') img = self.create_local_image_file(fn) self.cli(f'datasets put {fn} image', new_start=True) self.assertLogContains('info', 'Metadata(') self.assertLogContains('info', 'kind=ndarray.bin') data = self.om.datasets.get('image') assert_array_equal(data, img)
def put_model(self, obj, name, attributes=None, **kwargs): fn = temp_filename() self._save_model(obj, fn) with open(fn, mode='rb') as fin: fileid = self.model_store.fs.put( fin, filename=self.model_store._get_obj_store_key(name, 'h5')) gridfile = GridFSProxy(grid_id=fileid, db_alias='omega', collection_name=self.model_store.bucket) remove_temp_filename(fn) return self.model_store._make_metadata(name=name, prefix=self.model_store.prefix, bucket=self.model_store.bucket, kind=self.KIND, attributes=attributes, gridfile=gridfile).save()