Ejemplo n.º 1
0
    def evaluate(self, learners: Sequence[Learner[_C,_A]], transaction_log:str = None, seed:int = None) -> Result:
        """Collect observations of a Learner playing the benchmark's simulations to calculate Results.

        Args:
            factories: See the base class for more information.

        Returns:
            See the base class for more information.
        """
        benchmark_learners   = [ BenchmarkLearner(learner, seed) for learner in learners ] #type: ignore
        restored             = Result.from_transaction_log(transaction_log)
        task_source          = TaskSource(self._simulation_pipes, benchmark_learners, restored)
        task_to_transactions = TaskToTransactions(self._ignore_raise)
        transaction_sink     = TransactionSink(transaction_log, restored)

        n_given_learners    = len(benchmark_learners)
        n_given_simulations = len(self._simulation_pipes)
 
        if len(restored.benchmark) != 0:
            assert n_given_learners    == restored.benchmark['n_learners'   ], "The currently evaluating benchmark doesn't match the given transaction log"
            assert n_given_simulations == restored.benchmark['n_simulations'], "The currently evaluating benchmark doesn't match the given transaction log"

        preamble_transactions = []
        preamble_transactions.append(Transaction.version(TransactionPromote.CurrentVersion))
        preamble_transactions.append(Transaction.benchmark(n_given_learners, n_given_simulations))
        preamble_transactions.extend(Transaction.learners(benchmark_learners))

        mp = self._processes if self._processes else ExecutionContext.Config.processes
        mt = self._maxtasksperchild if self._maxtasksperchild else ExecutionContext.Config.maxtasksperchild
        
        Pipe.join(MemorySource(preamble_transactions), []                    , transaction_sink).run(1,None)
        Pipe.join(task_source                        , [task_to_transactions], transaction_sink).run(mp,mt)

        return transaction_sink.result
Ejemplo n.º 2
0
    def test_multiprocess_singletask(self):
        source = MemorySource(list(range(4)))
        sink = MemorySink()

        Pipe.join(source, [Pipe_Tests.ProcessNameFilter()], sink).run(2, 1)

        self.assertEqual(len(set(sink.items)), 4)
Ejemplo n.º 3
0
    def test_single_process_multitask(self):
        source = MemorySource(list(range(10)))
        sink = MemorySink()

        Pipe.join(source, [Pipe_Tests.ProcessNameFilter()], sink).run()

        self.assertEqual(sink.items, ['MainProcess'] * 10)
Ejemplo n.º 4
0
    def from_transaction_log(filename: Optional[str]) -> 'Result':
        """Create a Result from a transaction file."""
        
        if filename is None or not Path(filename).exists(): return Result()

        Pipe.join(DiskSource(filename), [JsonDecode(), TransactionPromote(), JsonEncode()], DiskSink(filename, 'w')).run()
        
        return Result.from_transactions(Pipe.join(DiskSource(filename), [JsonDecode()]).read())
Ejemplo n.º 5
0
    def test_logging(self):

        actual_logs = []

        ExecutionContext.Logger = UniversalLogger(
            lambda msg, end: actual_logs.append((msg, end)))

        source = MemorySource(list(range(4)))
        sink = MemorySink()

        Pipe.join(source, [Pipe_Tests.ProcessNameFilter()], sink).run(2, 1)

        self.assertEqual(len(actual_logs), 4)
        self.assertEqual(sink.items, [l[0][20:] for l in actual_logs])
Ejemplo n.º 6
0
 def __init__(self, 
     source             : Source[Simulation], 
     filters            : Sequence[Filter[Simulation,Union[Simulation, BatchedSimulation]]],
     source_description :str = "", 
     filter_descriptions:Sequence[str] = []) -> None:
     
     if isinstance(source, BenchmarkSimulation):
         self._source             = source._source #type: ignore
         self._filter             = Pipe.FiltersFilter(list(source._filter._filters)+list(filters)) #type: ignore
         self.source_description  = source.source_description or source_description #type: ignore
         self.filter_descriptions = list(source.filter_descriptions) + list(filter_descriptions) #type: ignore
     else:
         self._source             = source
         self._filter             = Pipe.FiltersFilter(filters)
         self.source_description  = source_description
         self.filter_descriptions = list(filter_descriptions)
Ejemplo n.º 7
0
    def _process_task(self, task) -> Iterable[Any]:

        simulation_ids   = task[0]
        learner_ids      = task[1]
        learners         = task[2]
        
        simulation_pipes   = task[3]
        simulation_source  = simulation_pipes[0]._source #we only need one source since we group by sources when making tasks
        simulation_filters = [ simulation_pipe._filter for simulation_pipe in simulation_pipes ]

        collapsed_pipe = Pipe.join(simulation_source, [ForeachFilter(simulation_filters)])

        written_simulations = []

        try:
            for simulation_id, learner_id, learner, pipe, simulation in zip(simulation_ids, learner_ids, learners, simulation_pipes, collapsed_pipe.read()):
                batches      = simulation.interaction_batches
                interactions = list(chain.from_iterable(batches))

                if simulation_id not in written_simulations:
                    written_simulations.append(simulation_id)
                    yield Transaction.simulation(simulation_id,
                        source            = pipe.source_description,
                        filters           = pipe.filter_descriptions,
                        interaction_count = len(interactions),
                        batch_count       = len(batches),
                        context_size      = int(median(self._context_sizes(interactions))),
                        action_count      = int(median(self._action_counts(interactions))))

                learner = deepcopy(learner)
                learner.init()

                if len(batches) > 0:
                    Ns, Rs = zip(*[ self._process_batch(batch, simulation.reward, learner) for batch in batches ])
                    yield Transaction.batch(simulation_id, learner_id, N=list(Ns), reward=list(Rs))

        except KeyboardInterrupt:
            raise
        except Exception as e:
            ExecutionContext.Logger.log_exception(e, "unhandled exception:")
            if not self._ignore_raise: raise e
Ejemplo n.º 8
0
 def __init__(self, transaction_log: Optional[str], restored: Result) -> None:
     self._sink = Pipe.join([JsonEncode()], DiskSink(transaction_log)) if transaction_log else MemorySink()
     self._sink = Pipe.join([TransactionIsNew(restored)], self._sink)
Ejemplo n.º 9
0
    def read(self) -> Tuple[Sequence[Sequence[Any]], Sequence[Any]]:

        #placing some of these at the top would cause circular references
        from coba.data.pipes import Pipe
        from coba.data.encoders import Encoder, NumericEncoder, OneHotEncoder, StringEncoder
        from coba.data.filters import CsvReader, LabeledCsvCleaner

        data_id = self._data_id
        md5_checksum = self._md5_checksum
        openml_api_key = ExecutionContext.Config.openml_api_key

        data_description_url = f'https://www.openml.org/api/v1/json/data/{data_id}'

        type_description_url = f'https://www.openml.org/api/v1/json/data/features/{data_id}'

        if openml_api_key is not None:
            data_description_url += f'?api_key={openml_api_key}'
            type_description_url += f'?api_key={openml_api_key}'

        descr = json.loads(''.join(
            HttpSource(data_description_url, '.json', None,
                       'descr').read()))["data_set_description"]

        if descr['status'] == 'deactivated':
            raise Exception(
                f"Openml {data_id} has been deactivated. This is often due to flags on the data."
            )

        types = json.loads(''.join(
            HttpSource(type_description_url, '.json', None,
                       'types').read()))["data_features"]["feature"]

        headers: List[str] = []
        encoders: List[Encoder] = []
        ignored: List[bool] = []
        target: str = ""

        for tipe in types:

            headers.append(tipe['name'])
            ignored.append(tipe['is_ignore'] == 'true'
                           or tipe['is_row_identifier'] == 'true')

            if tipe['is_target'] == 'true':
                target = tipe['name']

            if tipe['data_type'] == 'numeric':
                encoders.append(NumericEncoder())
            elif tipe['data_type'] == 'nominal' and tipe[
                    'is_target'] == 'false':
                encoders.append(OneHotEncoder(singular_if_binary=True))
            elif tipe['data_type'] == 'nominal' and tipe['is_target'] == 'true':
                encoders.append(OneHotEncoder())
            else:
                encoders.append(StringEncoder())

        if isinstance(encoders[headers.index(target)], NumericEncoder):
            target = self._get_classification_target(data_id, openml_api_key)
            ignored[headers.index(target)] = False
            encoders[headers.index(target)] = OneHotEncoder()

        csv_url = f"http://www.openml.org/data/v1/get_csv/{descr['file_id']}"

        source = HttpSource(csv_url, ".csv", md5_checksum, f"openml {data_id}")
        reader = CsvReader()
        cleaner = LabeledCsvCleaner(target, headers, encoders, ignored, True)

        feature_rows, label_rows = Pipe.join(source, [reader, cleaner]).read()

        return list(feature_rows), list(label_rows)
Ejemplo n.º 10
0
    def test_exception_singleprocess(self):
        source = MemorySource(list(range(4)))
        sink = MemorySink()

        with self.assertRaises(Exception):
            Pipe.join(source, [Pipe_Tests.ExceptionFilter()], sink).run()