def test_create_from_string_content(self): file = File.from_content("some_content") self.assertEqual(None, file.path) self.assertEqual("some_content".encode("utf-8"), file.content) self.assertEqual("txt", file.extension) file = File.from_content("some_content", extension="png") self.assertEqual(None, file.path) self.assertEqual("some_content".encode("utf-8"), file.content) self.assertEqual("png", file.extension)
def test_create_from_bytes_content(self): file = File.from_content(b"some_content") self.assertEqual(None, file.path) self.assertEqual(b"some_content", file.content) self.assertEqual("bin", file.extension) file = File.from_content(b"some_content", extension="png") self.assertEqual(None, file.path) self.assertEqual(b"some_content", file.content) self.assertEqual("png", file.extension)
def log(self, key, v): value = deepcopy(v) if isinstance(value, np.ndarray) and len(value.shape) == 0: value = float(value) elif isinstance(value, np.ndarray) and len(value.shape) == 1: value = File.as_image(np.array([value])) elif isinstance(value, np.ndarray) and len(value.shape) <= 3: value = File.as_image(value) elif isinstance(value, np.ndarray): return self.run[key].log(value)
def test_log_many_values(self): exp = init(mode="debug", flush_period=0.5) exp["some/num/val"].log([5, 10, 15]) exp["some/str/val"].log(["some text", "other"]) exp["some/img/val"].log([ FileVal.as_image(PIL.Image.new("RGB", (60, 30), color="red")), FileVal.as_image(PIL.Image.new("RGB", (20, 90), color="red")), ]) self.assertEqual(exp["some"]["num"]["val"].fetch_last(), 15) self.assertEqual(exp["some"]["str"]["val"].fetch_last(), "other") self.assertIsInstance(exp.get_structure()["some"]["img"]["val"], FileSeries)
def test_assign_content(self): # given wait = self._random_wait() path = self._random_path() op_processor = MagicMock() exp = self._create_run(processor=op_processor) attr = FileSeries(exp, path) file = File.as_image(numpy.random.rand(10, 10) * 255) # when attr.assign([file], wait=wait) # then op_processor.enqueue_operation.assert_has_calls([ call(ClearImageLog(path), False), call( LogImages( path, [ LogImages.ValueType( ImageValue(base64_encode(file.content), None, None), None, self._now(), ) ], ), wait, ), ])
def test_log_content(self): # given wait = self._random_wait() path = self._random_path() op_processor = MagicMock() exp = self._create_run(processor=op_processor) attr = FileSeries(exp, path) file = File.as_image(numpy.random.rand(10, 10) * 255) # when attr.log( file, step=3, timestamp=self._now(), wait=wait, name="nazwa", description="opis", ) # then op_processor.enqueue_operation.assert_called_once_with( LogImages( path, [ LogImages.ValueType( ImageValue(base64_encode(file.content), "nazwa", "opis"), 3, self._now(), ) ], ), wait, )
def test_assign_raise_not_image(self): # given path = self._random_path() op_processor = MagicMock() exp = self._create_run(processor=op_processor) attr = FileSeries(exp, path) file = File.from_content("some text") with create_file(file.content, binary_mode=True) as tmp_filename: saved_file = File(tmp_filename) # when with self.assertRaises(OperationNotSupported): attr.assign([file]) with self.assertRaises(OperationNotSupported): attr.assign([saved_file])
def test_log_value_errors(self): exp = init(mode="debug", flush_period=0.5) img = FileVal.as_image(PIL.Image.new("RGB", (60, 30), color="red")) with self.assertRaises(ValueError): exp["x"].log([]) with self.assertRaises(ValueError): exp["x"].log([5, "str"]) with self.assertRaises(ValueError): exp["x"].log([5, 10], step=10) exp["some/num/val"].log([5], step=1) exp["some/num/val"].log([]) with self.assertRaises(ValueError): exp["some/num/val"].log("str") with self.assertRaises(TypeError): exp["some/num/val"].log(img) exp["some/str/val"].log(["str"], step=1) exp["some/str/val"].log([]) exp["some/img/val"].log([img], step=1) exp["some/img/val"].log([]) with self.assertRaises(TypeError): exp["some/img/val"].log(5) with self.assertRaises(FileNotFound): exp["some/img/val"].log("path") self.assertEqual(exp["some"]["num"]["val"].fetch_last(), 5) self.assertEqual(exp["some"]["str"]["val"].fetch_last(), "str") self.assertIsInstance(exp.get_structure()["some"]["img"]["val"], FileSeries)
def handle_files_and_images(self): # image im_frame = Image.open(self.img_path) g_img = File.as_image(im_frame) self.exp[ARTIFACT_ATTRIBUTE_SPACE]["assigned image"] = g_img self.exp.wait() with self.with_check_if_file_appears("assigned image.png"): self.exp[ARTIFACT_ATTRIBUTE_SPACE]["assigned image"].download() with self.with_check_if_file_appears("custom_dest.png"): self.exp[ARTIFACT_ATTRIBUTE_SPACE]["assigned image"].download( "custom_dest.png") self.exp[ARTIFACT_ATTRIBUTE_SPACE]["logged image"].log(g_img) with open(self.img_path, mode="r") as f: # self.exp[ARTIFACT_ATTRIBUTE_SPACE]['assigned image stream'] = f self.exp[ARTIFACT_ATTRIBUTE_SPACE]["logged image stream"].log(f) # artifact text_file = neptune.types.File(self.text_file_path) self.exp[ARTIFACT_ATTRIBUTE_SPACE]["assigned file"] = text_file # self.exp[ARTIFACT_ATTRIBUTE_SPACE]['logged file'].log(text_file) # wrong type with open(self.text_file_path, mode="r") as f: self.exp[ARTIFACT_ATTRIBUTE_SPACE]["assigned file stream"] = f self.exp[ARTIFACT_ATTRIBUTE_SPACE]["logged file stream"].log(f)
def validation_step(self, batch, batch_idx): if batch_idx == 0: # Plot example model_output = self(batch) fig = plot_example(batch, model_output) self.logger.experiment['validation/plot'].log(File.as_image(fig)) return self._training_or_validation_step(batch, is_train_step=False)
def test_as_pickle(self): # given obj = {"a": [b"xyz", 34], "b": 1246} # when file = File.as_pickle(obj) # then self.assertEqual(file.extension, "pkl") self.assertEqual(file.content, pickle.dumps(obj))
def test_log(self): exp = init(mode="debug", flush_period=0.5) exp["some/num/val"].log(5) exp["some/str/val"].log("some text") exp["some/img/val"].log( FileVal.as_image(PIL.Image.new("RGB", (60, 30), color="red"))) exp["some/img/val"].log(PIL.Image.new("RGB", (60, 30), color="red")) self.assertEqual(exp["some"]["num"]["val"].fetch_last(), 5) self.assertEqual(exp["some"]["str"]["val"].fetch_last(), "some text") self.assertIsInstance(exp.get_structure()["some"]["img"]["val"], FileSeries)
def test_create_from_path(self): file = File("some/path.ext") self.assertEqual("some/path.ext", file.path) self.assertEqual(None, file.content) self.assertEqual("ext", file.extension) file = File("some/path.txt.ext") self.assertEqual("some/path.txt.ext", file.path) self.assertEqual(None, file.content) self.assertEqual("ext", file.extension) file = File("so.me/path") self.assertEqual("so.me/path", file.path) self.assertEqual(None, file.content) self.assertEqual("", file.extension) file = File("some/path.ext", extension="txt") self.assertEqual("some/path.ext", file.path) self.assertEqual(None, file.content) self.assertEqual("txt", file.extension)
def test_as_image(self): # given image_array = numpy.random.rand(10, 10) * 255 expected_image = Image.fromarray(image_array.astype(numpy.uint8)) # when file = File.as_image(expected_image) # then self.assertEqual(file.extension, "png") self.assertEqual(file.content, _get_pil_image_data(expected_image))
def validation_step(self, batch, batch_idx): if batch_idx == 0: # Plot example model_output = self(batch) fig = plot_example(batch, model_output, history_len=params['history_len'], forecast_len=params['forecast_len'], nwp_channels=params['nwp_channels']) self.logger.experiment['validation/plot'].log(File.as_image(fig)) return self._training_or_validation_step(batch, is_train_step=False)
def test_save_download_text_stream_to_given_destination(self): exp = init(mode="debug", flush_period=0.5) data = "Some test content of the stream" exp["some/num/attr_name"] = FileVal.from_stream(StringIO(data)) self.assertIsInstance(exp.get_structure()["some"]["num"]["attr_name"], File) with create_file() as temp_filename: exp["some/num/attr_name"].download(temp_filename) with open(temp_filename, "rt") as file: self.assertEqual(file.read(), data)
def test_create_from_bytes_io(self): file = File.from_stream(BytesIO(b"aaabbbccc")) self.assertEqual(None, file.path) self.assertEqual(b"aaabbbccc", file.content) self.assertEqual("bin", file.extension) stream = BytesIO(b"aaabbbccc") stream.seek(3) file = File.from_stream(stream) self.assertEqual(None, file.path) self.assertEqual(b"aaabbbccc", file.content) self.assertEqual("bin", file.extension) file = File.from_stream(BytesIO(b"aaabbbccc"), extension="png") self.assertEqual(None, file.path) self.assertEqual(b"aaabbbccc", file.content) self.assertEqual("png", file.extension) file = File.from_stream(BytesIO(b"aaabbbccc"), seek=5) self.assertEqual(None, file.path) self.assertEqual(b"bccc", file.content) self.assertEqual("bin", file.extension)
def test_create_from_string_io(self): file = File.from_stream(StringIO("aaabbbccc")) self.assertEqual(None, file.path) self.assertEqual(b"aaabbbccc", file.content) self.assertEqual("txt", file.extension) stream = StringIO("aaabbbccc") stream.seek(3) file = File.from_stream(stream) self.assertEqual(None, file.path) self.assertEqual(b"aaabbbccc", file.content) self.assertEqual("txt", file.extension) file = File.from_stream(StringIO("aaabbbccc"), extension="png") self.assertEqual(None, file.path) self.assertEqual(b"aaabbbccc", file.content) self.assertEqual("png", file.extension) file = File.from_stream(StringIO("aaabbbccc"), seek=5) self.assertEqual(None, file.path) self.assertEqual(b"bccc", file.content) self.assertEqual("txt", file.extension)
def __init__(self, values, **kwargs): if not is_collection(values): raise TypeError("`values` is not a collection") self._values = [File.create_from(value) for value in values] self.name = kwargs.pop("name", None) self.description = kwargs.pop("description", None) if kwargs: click.echo( "Warning: unexpected arguments ({kwargs}) in FileSeries". format(kwargs=kwargs), err=True, )
def plot_images_on_weblogger(dataset_name, stats, images, labels, more, log_text, weblogger=2): plot_images = images[0:np.max((4, len(images)))] if labels is None: labels = np.repeat('', len(plot_images)) labels = labels[0:np.max((4, len(labels)))] add_text = [''] * len(labels) if isinstance(more, dict) and 'image_name' in list(more.keys()): add_text = more['image_name'] metric_str = 'Debug/{} example images: [{}]'.format(log_text, dataset_name) if isinstance(weblogger, neptune.run.Run): [weblogger[metric_str].log (File.as_image(convert_normalized_tensor_to_plottable_array(im, stats['mean'], stats['std'], text=f'{lb}' + os.path.splitext(n)[0])/255)) for im, lb, n in zip(plot_images, labels, add_text)]
def _get_base64_image_content(file: File) -> str: if file.path is not None: if not os.path.exists(file.path): raise FileNotFound(file.path) with open(file.path, "rb") as image_file: file = File.from_stream(image_file) ext = imghdr.what("", h=file.content) if not ext: raise OperationNotSupported( "FileSeries supports only image files for now. " "Other file types will be implemented in future." ) return base64_encode(file.content)
def test_log_path(self): # given wait = self._random_wait() path = self._random_path() op_processor = MagicMock() exp = self._create_run(processor=op_processor) attr = FileSeries(exp, path) file = File.as_image(numpy.random.rand(10, 10) * 255) with create_file(file.content, binary_mode=True) as tmp_filename: saved_file = File(tmp_filename) # when attr.log( saved_file, step=3, timestamp=self._now(), wait=wait, description="something", ) # then op_processor.enqueue_operation.assert_called_once_with( LogImages( path, [ LogImages.ValueType( ImageValue(base64_encode(file.content), None, "something"), 3, self._now(), ) ], ), wait, )
def test_as_html(self): # given from bokeh.plotting import figure # given p = figure(plot_width=400, plot_height=400) p.circle(size=20, color="navy", alpha=0.5) # when file = File.as_html(p) # then self.assertEqual(file.extension, "html") self.assertTrue( file.content.startswith( '\n\n\n\n<!DOCTYPE html>\n<html lang="en">'.encode("utf-8")))
def on_epoch_end(self, trainer, pl_module): if self.run is None: return all_datasets = [] for sentence in self.sentences: datasets = get_datasets_from_sentence(pl_module, self.tokenizer, sentence) all_datasets.append('|'.join(datasets)) df = pd.DataFrame({ 'sentence': self.sentences, 'datasets': all_datasets }) self.run[f'display_{pl_module.current_epoch}'].upload(File.as_html(df))
def log_series(self): # floats self.exp[LOG_ATTRIBUTE_SPACE]["m1"].log(1) self.exp[LOG_ATTRIBUTE_SPACE]["m1"].log(2) self.exp[LOG_ATTRIBUTE_SPACE]["m1"].log(3) self.exp[LOG_ATTRIBUTE_SPACE]["m1"].log(2) self.exp[LOG_ATTRIBUTE_SPACE]["nested"]["m1"].log(1) # texts self.exp[LOG_ATTRIBUTE_SPACE]["m2"].log("a") self.exp[LOG_ATTRIBUTE_SPACE]["m2"].log("b") self.exp[LOG_ATTRIBUTE_SPACE]["m2"].log("c") # images im_frame = Image.open(self.img_path) g_img = File.as_image(im_frame) self.exp[LOG_ATTRIBUTE_SPACE]["g_img"].log(g_img)
def test_save_download_binary_stream_to_default_destination(self): exp = init(mode="debug", flush_period=0.5) data = b"Some test content of the stream" exp["some/num/attr_name"] = FileVal.from_stream(BytesIO(data)) self.assertIsInstance(exp.get_structure()["some"]["num"]["attr_name"], File) with TemporaryDirectory() as temp_dir: with patch( "neptune.new.internal.backends.neptune_backend_mock.os.path.abspath" ) as abspath_mock: abspath_mock.side_effect = lambda path: os.path.normpath( temp_dir + "/" + path) exp["some/num/attr_name"].download() with open(temp_dir + "/attr_name.bin", "rb") as file: self.assertEqual(file.read(), data)
def test_assign_series(self): exp = init(mode="debug", flush_period=0.5) exp["some/num/val"].assign(FloatSeriesVal([1, 2, 0, 10])) exp["some/str/val"].assign(StringSeriesVal(["text1", "text2"]), wait=True) exp["some/img/val"].assign( FileSeriesVal([ FileVal.as_image(PIL.Image.new("RGB", (10, 15), color="red")) ])) self.assertEqual(exp["some"]["num"]["val"].fetch_last(), 10) self.assertEqual(exp["some"]["str"]["val"].fetch_last(), "text2") self.assertIsInstance(exp.get_structure()["some"]["img"]["val"], FileSeries) exp["some/num/val"].assign(FloatSeriesVal([122, 543, 2, 5])) exp["some/str/val"].assign(StringSeriesVal( ["other 1", "other 2", "other 3"]), wait=True) self.assertEqual(exp["some"]["num"]["val"].fetch_last(), 5) self.assertEqual(exp["some"]["str"]["val"].fetch_last(), "other 3")
def on_epoch_end(self, trainer, pl_module): if self.run is None: return all_datasets = [] ids = [] for doc in self.docs: ids.append(doc.document_id) sentences = SentenceExtractor.get_all_valid_sentences(doc) datasets = [] for sentence in sentences: datasets.append( get_datasets_from_sentence(pl_module, self.tokenizer, sentence)) datasets = reduce(lambda a, b: a + b, datasets) datasets = list(set(datasets)) all_datasets.append('|'.join(datasets)) df = pd.DataFrame({'document_id': ids, 'datasets': all_datasets}) self.run[f'submission_{pl_module.current_epoch}'].upload( File.as_html(df))
def test_fetch_dict(self): now = datetime.now() exp = init(mode="debug", flush_period=0.5) exp["params/int"] = 1 exp["params/float"] = 3.14 exp["params/bool"] = True exp["params/datetime"] = now exp["params/sub-namespace/int"] = 42 exp["params/sub-namespace/string"] = "Some text" # attributes to be ignored exp["params/sub-namespace/string_series"].log("Some text #1") exp["params/sub-namespace/int_series"].log(100) exp["some/num/attr_name"] = FileVal.from_stream( BytesIO(b"Some stream")) # pylint: disable=assignment-from-no-return # that's a false positive, pylint looks at Handler.fetch() params_dict = exp["params"].fetch() self.assertDictEqual( params_dict, { "int": 1, "float": 3.14, "bool": True, "datetime": now.replace(microsecond=1000 * int(now.microsecond / 1000)), "sub-namespace": { "int": 42, "string": "Some text", }, }, )
def _log_study(run, study: optuna.Study): try: if type(study._storage) is optuna.storages._in_memory.InMemoryStorage: """pickle and log the study object to the 'study/study.pkl' path""" run['study/study_name'] = study.study_name run['study/storage_type'] = 'InMemoryStorage' run['study/study'] = File.as_pickle(study) pass else: run['study/study_name'] = study.study_name if isinstance(study._storage, optuna.storages.RedisStorage): run['study/storage_type'] = "RedisStorage" run['study/storage_url'] = study._storage._url elif isinstance(study._storage, optuna.storages._CachedStorage): run['study/storage_type'] = "RDBStorage" # apparently CachedStorage typically wraps RDBStorage run['study/storage_url'] = study._storage._backend.url elif isinstance(study._storage, optuna.storages.RDBStorage): run['study/storage_type'] = "RDBStorage" run['study/storage_url'] = study._storage.url else: run['study/storage_type'] = "unknown storage type" run['study/storage_url'] = "unknown storage url" except AttributeError: pass