Ejemplo n.º 1
0
    def from_file(filename: str) -> 'Result':
        """Create a Result from a transaction file."""
        
        #Why is this here??? This is really confusing in practice
        #if filename is None or not Path(filename).exists(): return Result()

        json_encode = Cartesian(JsonEncode())
        json_decode = Cartesian(JsonDecode())

        Pipe.join(DiskSource(filename), [json_decode, ResultPromote(), json_encode], DiskSink(filename, 'w')).run()
        
        return Result.from_transactions(Pipe.join(DiskSource(filename), [json_decode]).read())
Ejemplo n.º 2
0
class ReaderSimulation(Simulation):
    def __init__(self,
                 reader: Filter[Iterable[str], Any],
                 source: Union[str, Source[Iterable[str]]],
                 label_column: Union[str, int],
                 with_header: bool = True) -> None:

        self._reader = reader

        if isinstance(source, str) and source.startswith('http'):
            self._source = Pipe.join(HttpSource(source), [ResponseToLines()])
        elif isinstance(source, str):
            self._source = DiskSource(source)
        else:
            self._source = source

        self._label_column = label_column
        self._with_header = with_header
        self._interactions = cast(Optional[Sequence[Interaction]], None)

    def read(self) -> Iterable[Interaction]:
        """Read the interactions in this simulation."""
        return self._load_interactions()

    def _load_interactions(self) -> Sequence[Interaction]:
        parsed_rows_iter = iter(self._reader.filter(self._source.read()))

        if self._with_header:
            header = next(parsed_rows_iter)
        else:
            header = []

        if isinstance(self._label_column, str):
            label_col_index = header.index(self._label_column)
        else:
            label_col_index = self._label_column

        parsed_cols = list(Transpose().filter(parsed_rows_iter))

        label_col = parsed_cols.pop(label_col_index)
        feature_rows = list(Transpose().filter(Flatten().filter(parsed_cols)))

        is_sparse_labels = len(label_col) == 2 and isinstance(
            label_col[0], tuple) and isinstance(label_col[1], tuple)

        if is_sparse_labels:
            dense_labels: List[Any] = ['0'] * len(feature_rows)

            for label_row, label_val in zip(*label_col):  #type:ignore
                dense_labels[label_row] = label_val

        else:
            dense_labels = list(label_col)

        return ClassificationSimulation(feature_rows, dense_labels).read()

    def __repr__(self) -> str:
        return str(self._source)
Ejemplo n.º 3
0
    def __init__(self,
                 reader: Filter[Iterable[str], Any],
                 source: Union[str, Source[Iterable[str]]],
                 label_column: Union[str, int],
                 with_header: bool = True) -> None:

        self._reader = reader

        if isinstance(source, str) and source.startswith('http'):
            self._source = Pipe.join(HttpSource(source), [ResponseToLines()])
        elif isinstance(source, str):
            self._source = DiskSource(source)
        else:
            self._source = source

        self._label_column = label_column
        self._with_header = with_header
        self._interactions = cast(Optional[Sequence[Interaction]], None)
Ejemplo n.º 4
0
    def from_file(arg) -> 'Benchmark':  #type: ignore
        """Instantiate a Benchmark from a config file."""

        if isinstance(arg, str) and arg.startswith('http'):
            content = '\n'.join(ResponseToLines().filter(
                HttpSource(arg).read()))

        elif isinstance(arg, str) and not arg.startswith('http'):
            content = '\n'.join(DiskSource(arg).read())

        else:
            content = arg.read()  #type: ignore

        return CobaRegistry.construct(CobaConfig.Benchmark['file_fmt']).filter(
            JsonDecode().filter(content))
Ejemplo n.º 5
0
    def test_from_definition_source(self):
        if Path("coba/tests/.temp/from_file.env").exists():
            Path("coba/tests/.temp/from_file.env").unlink()

        try:
            Path("coba/tests/.temp/from_file.env").write_text(
                '{ "environments" : { "OpenmlSimulation": 150 } }')

            env = Environments.from_file(
                DiskSource("coba/tests/.temp/from_file.env"))

            self.assertEqual(1, len(env))
            self.assertEqual(150, env[0].params['openml'])
            self.assertEqual(False, env[0].params['cat_as_str'])

        finally:
            if Path("coba/tests/.temp/from_file.env").exists():
                Path("coba/tests/.temp/from_file.env").unlink()
Ejemplo n.º 6
0
    def __init__(self, transaction_log: Optional[str] = None) -> None:

        if not transaction_log or not Path(transaction_log).exists():
            version = None
        else:
            version = JsonDecode().filter(next(DiskSource(transaction_log).read()))[1]

        if version == 3:
            self._transactionIO = TransactionIO_V3(transaction_log)

        elif version == 4:
            self._transactionIO = TransactionIO_V4(transaction_log)

        elif version is None:
            self._transactionIO = TransactionIO_V4(transaction_log)

        else:
            raise CobaException("We were unable to determine the appropriate Transaction reader for the file.")
Ejemplo n.º 7
0
 def test_is_picklable(self):
     pickle.dumps(DiskSource("coba/tests/.temp/test.gz"))
Ejemplo n.º 8
0
 def test_simple_with_gz(self):
     Path("coba/tests/.temp/test.gz").write_bytes(gzip.compress(b'a\nb\nc'))
     self.assertEqual(["a", "b", "c"],
                      list(DiskSource("coba/tests/.temp/test.gz").read()))
Ejemplo n.º 9
0
 def test_simple_sans_gz(self):
     Path("coba/tests/.temp/test.log").write_text("a\nb\nc")
     self.assertEqual(["a", "b", "c"],
                      list(DiskSource("coba/tests/.temp/test.log").read()))
Ejemplo n.º 10
0
    def __init__(self, log_file: Optional[str] = None, minify:bool=True) -> None:

        self._log_file = log_file
        self._minify   = minify
        self._source   = DiskSource(log_file) if log_file else ListSource()
        self._sink     = DiskSink(log_file)   if log_file else ListSink(self._source.items)