Beispiel #1
0
 def test_cli_datasets_get(self):
     # write a csv, read back
     fn = temp_filename(ext='csv')
     self.make_dataset_from_dataframe('test')
     self.cli(f'datasets get test {fn}')
     df = pd.read_csv(fn)
     expected = self.om.datasets.get('test')
     assert_frame_equal(df, expected)
     # write a csv, read back with options
     fn = temp_filename(ext='csv')
     self.make_dataset_from_dataframe('test-sep')
     self.cli(f'datasets get test-sep {fn} --csv sep=; --csv columns=x,')
     df = pd.read_csv(fn, sep=';')
     expected = self.om.datasets.get('test')[['x']]
     assert_frame_equal(expected, df)
Beispiel #2
0
 def _fit_tpu(self, modelname, Xname, Yname=None, tpu_specs=None, **kwargs):
     import tensorflow as tf
     # adopted from https://www.dlology.com/blog/how-to-train-keras-model-x20-times-faster-with-tpu-for-free/
     # This address identifies the TPU we'll use when configuring TensorFlow.
     # FIXME this will fail in tf 2.0, see https://github.com/tensorflow/tensorflow/issues/24412#issuecomment-491980177
     assert tf.__version__.startswith('1.'), "TPU only supported on tf < 2.0"
     tpu_device = tpu_specs or os.environ.get('COLAB_TPU_ADDR', '')
     assert tpu_device, "there is no TPU device"
     if tpu_device.startswith('grpc://'):
         tpu_worker = tpu_device
     else:
         tpu_worker = 'grpc://' + tpu_device
     tf.logging.set_verbosity(tf.logging.INFO)
     model = self.get_model(modelname)
     tpu_model = tf.contrib.tpu.keras_to_tpu_model(
         model,
         strategy=tf.contrib.tpu.TPUDistributionStrategy(
             tf.contrib.cluster_resolver.TPUClusterResolver(tpu_worker)))
     X = self.data_store.get(Xname)
     Y = self.data_store.get(Yname)
     tpu_model.fit(X, Y)
     fn = temp_filename()
     tpu_model.save_weights(fn, overwrite=True)
     model.load_weights(fn)
     meta = self.put(model, modelname)
     return meta
Beispiel #3
0
 def test_cli_jobs_get(self):
     self.create_job('test')
     fn = temp_filename(ext='ipynb')
     self.cli(f'jobs get test {fn}')
     self.assertTrue(os.path.exists(fn))
     with open(fn, 'r') as fin:
         nb = nbformat.read(fin, as_version=4)
     self.assertIn('cells', nb)
     self.assertIn('hello', nb['cells'][0]['source'])
Beispiel #4
0
 def test_cli_jobs_put(self):
     self.assertNotIn('test', self.om.jobs.list())
     cells = []
     code = "print 'hello'"
     cells.append(nbformat.v4.new_code_cell(source=code))
     notebook = nbformat.v4.new_notebook(cells=cells)
     fn = temp_filename(ext='ipynb')
     with open(fn, 'w') as fout:
         nbformat.write(notebook, fout, version=4)
     self.cli(f'jobs put {fn} test')
     self.assertLogContains('info', 'Metadata')
     self.assertLogContains('info', 'name=test')
     self.assertIn('test.ipynb', self.om.jobs.list())
Beispiel #5
0
def r_source(r_code):
    """ helper to source given r code and reticulated r session

    Args:
            r_code (str): the string of R code, as entered in REPL or R file
    """
    r = rhelper()
    fn = temp_filename(ext='R')
    with open(fn, 'w') as fout:
        fout.write(r_code)
    result = r.source(fn)
    remove_temp_filename(fn)
    return result
Beispiel #6
0
 def test_cli_datasets_put(self):
     # write a csv, read back
     fn = temp_filename(ext='csv')
     df = self.create_local_csv(fn)
     self.cli(f'datasets put {fn} foobar')
     self.assertLogContains('info', 'Metadata(')
     self.assertLogContains('info', 'kind=pandas.dfrows')
     dfx = self.om.datasets.get('foobar')
     assert_frame_equal(df, dfx)
     # write a csv, read back, use custom sep
     fn = temp_filename(ext='csv')
     df = self.create_local_csv(fn, sep=';')
     self.cli(f'datasets put {fn} foxbaz --csv sep=;')
     self.assertLogContains('info', 'Metadata(')
     self.assertLogContains('info', 'kind=pandas.dfrows')
     dfx = self.om.datasets.get('foxbaz')
     assert_frame_equal(df, dfx)
     # write some other file
     fn = temp_filename(ext='binary')
     df = self.create_local_csv(fn)
     self.cli(f'datasets put {fn} binary', new_start=True)
     print(self.get_log('info'))
     self.assertLogContains('info', 'Metadata(')
     self.assertLogContains('info', 'kind=python.file')
     with open(fn, 'rb') as fin:
         expected = fin.read()
     data = self.om.datasets.get('binary').read()
     self.assertEqual(expected, data)
     # write image file
     fn = temp_filename(ext='jpg')
     img = self.create_local_image_file(fn)
     self.cli(f'datasets put {fn} image', new_start=True)
     self.assertLogContains('info', 'Metadata(')
     self.assertLogContains('info', 'kind=ndarray.bin')
     data = self.om.datasets.get('image')
     assert_array_equal(data, img)
Beispiel #7
0
 def put_model(self, obj, name, attributes=None, **kwargs):
     fn = temp_filename()
     self._save_model(obj, fn)
     with open(fn, mode='rb') as fin:
         fileid = self.model_store.fs.put(
             fin, filename=self.model_store._get_obj_store_key(name, 'h5'))
         gridfile = GridFSProxy(grid_id=fileid,
                                db_alias='omega',
                                collection_name=self.model_store.bucket)
     remove_temp_filename(fn)
     return self.model_store._make_metadata(name=name,
                                            prefix=self.model_store.prefix,
                                            bucket=self.model_store.bucket,
                                            kind=self.KIND,
                                            attributes=attributes,
                                            gridfile=gridfile).save()