def from_file(filename: str) -> 'Result': """Create a Result from a transaction file.""" #Why is this here??? This is really confusing in practice #if filename is None or not Path(filename).exists(): return Result() json_encode = Cartesian(JsonEncode()) json_decode = Cartesian(JsonDecode()) Pipe.join(DiskSource(filename), [json_decode, ResultPromote(), json_encode], DiskSink(filename, 'w')).run() return Result.from_transactions(Pipe.join(DiskSource(filename), [json_decode]).read())
class ReaderSimulation(Simulation): def __init__(self, reader: Filter[Iterable[str], Any], source: Union[str, Source[Iterable[str]]], label_column: Union[str, int], with_header: bool = True) -> None: self._reader = reader if isinstance(source, str) and source.startswith('http'): self._source = Pipe.join(HttpSource(source), [ResponseToLines()]) elif isinstance(source, str): self._source = DiskSource(source) else: self._source = source self._label_column = label_column self._with_header = with_header self._interactions = cast(Optional[Sequence[Interaction]], None) def read(self) -> Iterable[Interaction]: """Read the interactions in this simulation.""" return self._load_interactions() def _load_interactions(self) -> Sequence[Interaction]: parsed_rows_iter = iter(self._reader.filter(self._source.read())) if self._with_header: header = next(parsed_rows_iter) else: header = [] if isinstance(self._label_column, str): label_col_index = header.index(self._label_column) else: label_col_index = self._label_column parsed_cols = list(Transpose().filter(parsed_rows_iter)) label_col = parsed_cols.pop(label_col_index) feature_rows = list(Transpose().filter(Flatten().filter(parsed_cols))) is_sparse_labels = len(label_col) == 2 and isinstance( label_col[0], tuple) and isinstance(label_col[1], tuple) if is_sparse_labels: dense_labels: List[Any] = ['0'] * len(feature_rows) for label_row, label_val in zip(*label_col): #type:ignore dense_labels[label_row] = label_val else: dense_labels = list(label_col) return ClassificationSimulation(feature_rows, dense_labels).read() def __repr__(self) -> str: return str(self._source)
def __init__(self, reader: Filter[Iterable[str], Any], source: Union[str, Source[Iterable[str]]], label_column: Union[str, int], with_header: bool = True) -> None: self._reader = reader if isinstance(source, str) and source.startswith('http'): self._source = Pipe.join(HttpSource(source), [ResponseToLines()]) elif isinstance(source, str): self._source = DiskSource(source) else: self._source = source self._label_column = label_column self._with_header = with_header self._interactions = cast(Optional[Sequence[Interaction]], None)
def from_file(arg) -> 'Benchmark': #type: ignore """Instantiate a Benchmark from a config file.""" if isinstance(arg, str) and arg.startswith('http'): content = '\n'.join(ResponseToLines().filter( HttpSource(arg).read())) elif isinstance(arg, str) and not arg.startswith('http'): content = '\n'.join(DiskSource(arg).read()) else: content = arg.read() #type: ignore return CobaRegistry.construct(CobaConfig.Benchmark['file_fmt']).filter( JsonDecode().filter(content))
def test_from_definition_source(self): if Path("coba/tests/.temp/from_file.env").exists(): Path("coba/tests/.temp/from_file.env").unlink() try: Path("coba/tests/.temp/from_file.env").write_text( '{ "environments" : { "OpenmlSimulation": 150 } }') env = Environments.from_file( DiskSource("coba/tests/.temp/from_file.env")) self.assertEqual(1, len(env)) self.assertEqual(150, env[0].params['openml']) self.assertEqual(False, env[0].params['cat_as_str']) finally: if Path("coba/tests/.temp/from_file.env").exists(): Path("coba/tests/.temp/from_file.env").unlink()
def __init__(self, transaction_log: Optional[str] = None) -> None: if not transaction_log or not Path(transaction_log).exists(): version = None else: version = JsonDecode().filter(next(DiskSource(transaction_log).read()))[1] if version == 3: self._transactionIO = TransactionIO_V3(transaction_log) elif version == 4: self._transactionIO = TransactionIO_V4(transaction_log) elif version is None: self._transactionIO = TransactionIO_V4(transaction_log) else: raise CobaException("We were unable to determine the appropriate Transaction reader for the file.")
def test_is_picklable(self): pickle.dumps(DiskSource("coba/tests/.temp/test.gz"))
def test_simple_with_gz(self): Path("coba/tests/.temp/test.gz").write_bytes(gzip.compress(b'a\nb\nc')) self.assertEqual(["a", "b", "c"], list(DiskSource("coba/tests/.temp/test.gz").read()))
def test_simple_sans_gz(self): Path("coba/tests/.temp/test.log").write_text("a\nb\nc") self.assertEqual(["a", "b", "c"], list(DiskSource("coba/tests/.temp/test.log").read()))
def __init__(self, log_file: Optional[str] = None, minify:bool=True) -> None: self._log_file = log_file self._minify = minify self._source = DiskSource(log_file) if log_file else ListSource() self._sink = DiskSink(log_file) if log_file else ListSink(self._source.items)