Esempio n. 1
0
    def setup(self, network, inference_engine, query_options):
        distributions = []
        variables = []
        for variable_name in [self._variable_a_name, self._variable_b_name]:
            variable = bayesianpy.network.get_variable(network, variable_name)

            if not bayesianpy.network.get_variable(network, variable_name):
                raise ValueError(
                    "Variable {} does not exist in the network".format(
                        variable_name))

            if bayesianpy.network.is_variable_continuous(variable_name):
                distributions.append(bayesServer().CLGaussian(variable))
            else:
                distributions.append(bayesServer().Table(variable))

            variables.append(variable)

        for query in distributions:
            inference_engine.getQueryDistributions().add(
                bayesServerInference().QueryDistribution(query))

        query_options.setQueryEvidenceMode(
            bayesServerInference().QueryEvidenceMode.RETRACT_QUERY_EVIDENCE)

        self._distributions = distributions
        self._variables = variables
Esempio n. 2
0
    def setup(self, network, inference_engine, query_options):
        contexts = []
        for h in self._head_variables + self._tail_variables:
            v = bayesianpy.network.get_variable(network, h)

            if bayesianpy.network.is_variable_discrete(v):
                if h in self._head_variables:
                    #raise ValueError("Bayespy only supports discrete tail variables (BayesServer is fine with it though!)")
                    self._is_discrete_head = True

                self._discrete_variables.append(v.getName())
            else:
                if h in self._tail_variables:
                    raise ValueError(
                        "Bayespy only supports continuous head variables (BayesServer is fine with it though!)"
                    )

            contexts.append(bayesServer().VariableContext(
                v,
                bayesServer().HeadTail.HEAD
                if h in self._head_variables else bayesServer().HeadTail.TAIL))

        self._network = network
        if self._is_discrete_head:
            self._distribution = bayesServer().Table(contexts)
        else:
            self._distribution = bayesServer().CLGaussian(contexts)

        self._query_distribution = bayesServerInference().QueryDistribution(
            self._distribution)
        inference_engine.getQueryDistributions().add(self._query_distribution)
Esempio n. 3
0
    def create_data_reader_command(self):
        """
        Get the data reader
        :param indexes: training/ testing indexes
        :return: a a DatabaseDataReaderCommand
        """
        data_reader_command = bayesServer().data.DataTableDataReaderCommand(self._data_table)

        return data_reader_command
Esempio n. 4
0
    def create_data_reader_command(self):
        """
        Get the data reader
        :param indexes: training/ testing indexes
        :return: a a DatabaseDataReaderCommand
        """

        data_reader_command = bayesServer().data.DatabaseDataReaderCommand(
            self.get_connection(), query)

        return data_reader_command
Esempio n. 5
0
 def update_network_layout(self, pos):
     for key, value in pos.items():
         node = self._jnetwork.getNodes().get(key)
         b = node.getBounds()
         height = b.getHeight()
         width = b.getWidth()
         x = value[0] * self._multiplier
         y = value[1] * self._multiplier
         if x < 0:
             x = 0.0
         if y < 0:
             y = 0.0
         node.setBounds(bayesServer().Bounds(x, y, width, height))
Esempio n. 6
0
    def setup(self, network, inference_engine, query_options):
        variables = [
            bayesianpy.network.get_variable(network, n)
            for n in self._variable_names
        ]

        if len(variables) == 0:
            raise ValueError(
                "QueryLogLikelihood: Requires a non-empty list for creating a distribution"
            )

        if len(variables) == 1:
            self._distribution = bayesServer().CLGaussian(variables[0])
        else:
            self._distribution = bayesServer().CLGaussian(variables)

        query_options.setQueryEvidenceMode(
            bayesServerInference().QueryEvidenceMode.RETRACT_QUERY_EVIDENCE)
        qd = bayesServerInference().QueryDistribution(self._distribution)
        qd.setQueryLogLikelihood(True)
        self._query_distribution = qd
        inference_engine.getQueryDistributions().add(qd)
Esempio n. 7
0
    def train(self, dataset: bayesianpy.data.DataSet, seed:int=None, maximum_iterations:int=100,
                    maximum_concurrency:int=1)\
            -> TrainingResults:
        """
        Train a model on data provided in the constructor
        """

        learning = bayesServerParams().ParameterLearning(
            self._jnetwork, self._inference_factory.get_inference_factory())
        learning_options = bayesServerParams().ParameterLearningOptions()

        learning_options.setMaximumConcurrency(
            jp.java.lang.Integer(maximum_concurrency))

        if seed is not None:
            learning_options.setSeed(int(seed))

        if maximum_iterations is not None:
            learning_options.setMaximumIterations(maximum_iterations)

        data_reader_command = dataset.create_data_reader_command().create(None)
        reader_options = dataset.get_reader_options().create()

        variable_references = list(
            bayesianpy.network.create_variable_references(
                self._jnetwork, dataset.get_dataframe()))

        evidence_reader_command = bayesServer(
        ).data.DefaultEvidenceReaderCommand(
            data_reader_command,
            jp.java.util.Arrays.asList(variable_references), reader_options)
        self._logger.info("Training model...")

        result = learning.learn(evidence_reader_command, learning_options)
        self._logger.info("Finished training model")

        return TrainingResults(
            self._jnetwork, {
                'converged': result.getConverged(),
                'loglikelihood': result.getLogLikelihood().floatValue(),
                'iteration_count': result.getIterationCount(),
                'case_count': result.getCaseCount(),
                'weighted_case_count': result.getWeightedCaseCount(),
                'unweighted_case_count': result.getUnweightedCaseCount(),
                'bic': result.getBIC().floatValue()
            }, self._logger)
Esempio n. 8
0
    def setup(self, network, inference_engine, query_options):
        distribution = None

        self._variable = bayesianpy.network.get_variable(
            network, self._target_variable_name)

        if bayesianpy.network.is_variable_discrete(self._variable):
            distribution = bayesServer().Table(self._variable)

        if distribution is None:
            raise ValueError(
                "{} needs to be discrete in QueryMostLikelyState".format(
                    self._target_variable_name))

        query_options.setQueryEvidenceMode(
            bayesServerInference().QueryEvidenceMode.RETRACT_QUERY_EVIDENCE)
        qd = bayesServerInference().QueryDistribution(distribution)

        self._distribution = distribution
        inference_engine.getQueryDistributions().add(qd)
Esempio n. 9
0
    def setup(self, network, inference_engine, query_options):
        self._variable = bayesianpy.network.get_variable(
            network, self._variable_name)

        if not bayesianpy.network.get_variable(network, self._variable_name):
            raise ValueError(
                "Variable {} does not exist in the network".format(
                    self._variable_name))

        if not bayesianpy.network.is_variable_continuous(self._variable):
            raise ValueError("{} needs to be continuous.".format(
                self._variable_name))

        self._query = bayesServer().CLGaussian(self._variable)

        if self._retract_evidence:
            query_options.setQueryEvidenceMode(bayesServerInference(
            ).QueryEvidenceMode.RETRACT_QUERY_EVIDENCE)

        inference_engine.getQueryDistributions().add(
            bayesServerInference().QueryDistribution(self._query))
Esempio n. 10
0
 def create(self):
     return bayesServer().data.ReaderOptions(self._index) if self._weight is None \
         else bayesServer().data.ReaderOptions(self._index, self._weight)
Esempio n. 11
0
    def create(self, _: pd.DataFrame):
        data_reader_command = bayesServer().data.DatabaseDataReaderCommand(
            self._conn, self._query)

        return data_reader_command
Esempio n. 12
0
def _batch_query(
    df: pd.DataFrame,
    network_string: str,
    variable_references: List[str],
    queries: List[QueryFactory],
    create_data_reader_command: bayesianpy.reader.CreatableWithDf,
    create_data_reader_options: bayesianpy.reader.Creatable,
    logger: logging.Logger = None,
):

    if logger is None:
        logger = logging.getLogger(__name__)

    query_instances = [query.create() for query in queries]

    try:
        bayesianpy.jni.attach(heap_space='1g')
        schema = bayesianpy.data.DataFrame.get_schema(df)

        # TODO: this is very nasty. Need to do this better.
        # DaskDataset (if using Dask) requires a non empty dataframe. Whereas Pandas and DB
        # datasets have this instantiated before being passed in.
        if not df.empty:
            drc = create_data_reader_command.create(df)
        else:
            drc = create_data_reader_command.create(None)

        # TODO: also not great, maybe need a 'callable' on dataReaderCommand?
        if isinstance(drc, jp.JProxy):
            data_reader = drc.getCallable('executeReader')()
        else:
            data_reader = drc.executeReader()

        network = bayesianpy.network.create_network_from_string(network_string)
        reader_options = create_data_reader_options.create()
        variable_refs = list(
            bayesianpy.network.create_variable_references(
                network, schema, variable_references=variable_references))

        if len(variable_refs) == 0:
            raise ValueError(
                "Could not match any variables in the supplied dataset with the network. Is it the same?"
            )

        reader = bayesServer().data.DefaultEvidenceReader(
            data_reader, jp.java.util.Arrays.asList(variable_refs),
            reader_options)

        inference_engine = bayesianpy.model.InferenceEngine(
            network).create_engine()
        query_options = bayesianpy.model.InferenceEngine.get_inference_factory(
        ).createQueryOptions()
        query_output = bayesianpy.model.InferenceEngine.get_inference_factory(
        ).createQueryOutput()

        for query in query_instances:
            query.setup(network, inference_engine, query_options)

        ev = bayesianpy.model.Evidence(network, inference_engine).apply()

        results = []
        i = 0
        try:
            while reader.read(ev, bayesServer().data.DefaultReadOptions(True)):
                result = {}

                try:
                    inference_engine.query(query_options, query_output)
                except BaseException as e:
                    logger.error(e)
                    # inference_engine.getEvidence().clear()
                    # continue

                for query in query_instances:
                    result = {
                        **result,
                        **query.results(inference_engine, query_output)
                    }

                ev.clear()
                result.update({
                    'caseid':
                    int(reader.getReadInfo().getCaseId().toString())
                })

                results.append(result)

                if i % 500 == 0:
                    logger.info("Queried case {}".format(i))

                i += 1
        except BaseException as e:
            logger.error("Unexpected Error!")
            logger.error(e)
        finally:
            reader.close()
            # bayespy.jni.detach()
        if len(results) == 0:
            return pd.DataFrame()

        return pd.DataFrame(results).set_index('caseid')

    except BaseException as e:
        q = [str(query) for query in query_instances]

        logger.error("Unexpected Error: {}. Using queries: {}".format(
            e, r"\n ".join(q)))