def setup(self, network, inference_engine, query_options): distributions = [] variables = [] for variable_name in [self._variable_a_name, self._variable_b_name]: variable = bayesianpy.network.get_variable(network, variable_name) if not bayesianpy.network.get_variable(network, variable_name): raise ValueError( "Variable {} does not exist in the network".format( variable_name)) if bayesianpy.network.is_variable_continuous(variable_name): distributions.append(bayesServer().CLGaussian(variable)) else: distributions.append(bayesServer().Table(variable)) variables.append(variable) for query in distributions: inference_engine.getQueryDistributions().add( bayesServerInference().QueryDistribution(query)) query_options.setQueryEvidenceMode( bayesServerInference().QueryEvidenceMode.RETRACT_QUERY_EVIDENCE) self._distributions = distributions self._variables = variables
def setup(self, network, inference_engine, query_options): contexts = [] for h in self._head_variables + self._tail_variables: v = bayesianpy.network.get_variable(network, h) if bayesianpy.network.is_variable_discrete(v): if h in self._head_variables: #raise ValueError("Bayespy only supports discrete tail variables (BayesServer is fine with it though!)") self._is_discrete_head = True self._discrete_variables.append(v.getName()) else: if h in self._tail_variables: raise ValueError( "Bayespy only supports continuous head variables (BayesServer is fine with it though!)" ) contexts.append(bayesServer().VariableContext( v, bayesServer().HeadTail.HEAD if h in self._head_variables else bayesServer().HeadTail.TAIL)) self._network = network if self._is_discrete_head: self._distribution = bayesServer().Table(contexts) else: self._distribution = bayesServer().CLGaussian(contexts) self._query_distribution = bayesServerInference().QueryDistribution( self._distribution) inference_engine.getQueryDistributions().add(self._query_distribution)
def create_data_reader_command(self): """ Get the data reader :param indexes: training/ testing indexes :return: a a DatabaseDataReaderCommand """ data_reader_command = bayesServer().data.DataTableDataReaderCommand(self._data_table) return data_reader_command
def create_data_reader_command(self): """ Get the data reader :param indexes: training/ testing indexes :return: a a DatabaseDataReaderCommand """ data_reader_command = bayesServer().data.DatabaseDataReaderCommand( self.get_connection(), query) return data_reader_command
def update_network_layout(self, pos): for key, value in pos.items(): node = self._jnetwork.getNodes().get(key) b = node.getBounds() height = b.getHeight() width = b.getWidth() x = value[0] * self._multiplier y = value[1] * self._multiplier if x < 0: x = 0.0 if y < 0: y = 0.0 node.setBounds(bayesServer().Bounds(x, y, width, height))
def setup(self, network, inference_engine, query_options): variables = [ bayesianpy.network.get_variable(network, n) for n in self._variable_names ] if len(variables) == 0: raise ValueError( "QueryLogLikelihood: Requires a non-empty list for creating a distribution" ) if len(variables) == 1: self._distribution = bayesServer().CLGaussian(variables[0]) else: self._distribution = bayesServer().CLGaussian(variables) query_options.setQueryEvidenceMode( bayesServerInference().QueryEvidenceMode.RETRACT_QUERY_EVIDENCE) qd = bayesServerInference().QueryDistribution(self._distribution) qd.setQueryLogLikelihood(True) self._query_distribution = qd inference_engine.getQueryDistributions().add(qd)
def train(self, dataset: bayesianpy.data.DataSet, seed:int=None, maximum_iterations:int=100, maximum_concurrency:int=1)\ -> TrainingResults: """ Train a model on data provided in the constructor """ learning = bayesServerParams().ParameterLearning( self._jnetwork, self._inference_factory.get_inference_factory()) learning_options = bayesServerParams().ParameterLearningOptions() learning_options.setMaximumConcurrency( jp.java.lang.Integer(maximum_concurrency)) if seed is not None: learning_options.setSeed(int(seed)) if maximum_iterations is not None: learning_options.setMaximumIterations(maximum_iterations) data_reader_command = dataset.create_data_reader_command().create(None) reader_options = dataset.get_reader_options().create() variable_references = list( bayesianpy.network.create_variable_references( self._jnetwork, dataset.get_dataframe())) evidence_reader_command = bayesServer( ).data.DefaultEvidenceReaderCommand( data_reader_command, jp.java.util.Arrays.asList(variable_references), reader_options) self._logger.info("Training model...") result = learning.learn(evidence_reader_command, learning_options) self._logger.info("Finished training model") return TrainingResults( self._jnetwork, { 'converged': result.getConverged(), 'loglikelihood': result.getLogLikelihood().floatValue(), 'iteration_count': result.getIterationCount(), 'case_count': result.getCaseCount(), 'weighted_case_count': result.getWeightedCaseCount(), 'unweighted_case_count': result.getUnweightedCaseCount(), 'bic': result.getBIC().floatValue() }, self._logger)
def setup(self, network, inference_engine, query_options): distribution = None self._variable = bayesianpy.network.get_variable( network, self._target_variable_name) if bayesianpy.network.is_variable_discrete(self._variable): distribution = bayesServer().Table(self._variable) if distribution is None: raise ValueError( "{} needs to be discrete in QueryMostLikelyState".format( self._target_variable_name)) query_options.setQueryEvidenceMode( bayesServerInference().QueryEvidenceMode.RETRACT_QUERY_EVIDENCE) qd = bayesServerInference().QueryDistribution(distribution) self._distribution = distribution inference_engine.getQueryDistributions().add(qd)
def setup(self, network, inference_engine, query_options): self._variable = bayesianpy.network.get_variable( network, self._variable_name) if not bayesianpy.network.get_variable(network, self._variable_name): raise ValueError( "Variable {} does not exist in the network".format( self._variable_name)) if not bayesianpy.network.is_variable_continuous(self._variable): raise ValueError("{} needs to be continuous.".format( self._variable_name)) self._query = bayesServer().CLGaussian(self._variable) if self._retract_evidence: query_options.setQueryEvidenceMode(bayesServerInference( ).QueryEvidenceMode.RETRACT_QUERY_EVIDENCE) inference_engine.getQueryDistributions().add( bayesServerInference().QueryDistribution(self._query))
def create(self): return bayesServer().data.ReaderOptions(self._index) if self._weight is None \ else bayesServer().data.ReaderOptions(self._index, self._weight)
def create(self, _: pd.DataFrame): data_reader_command = bayesServer().data.DatabaseDataReaderCommand( self._conn, self._query) return data_reader_command
def _batch_query( df: pd.DataFrame, network_string: str, variable_references: List[str], queries: List[QueryFactory], create_data_reader_command: bayesianpy.reader.CreatableWithDf, create_data_reader_options: bayesianpy.reader.Creatable, logger: logging.Logger = None, ): if logger is None: logger = logging.getLogger(__name__) query_instances = [query.create() for query in queries] try: bayesianpy.jni.attach(heap_space='1g') schema = bayesianpy.data.DataFrame.get_schema(df) # TODO: this is very nasty. Need to do this better. # DaskDataset (if using Dask) requires a non empty dataframe. Whereas Pandas and DB # datasets have this instantiated before being passed in. if not df.empty: drc = create_data_reader_command.create(df) else: drc = create_data_reader_command.create(None) # TODO: also not great, maybe need a 'callable' on dataReaderCommand? if isinstance(drc, jp.JProxy): data_reader = drc.getCallable('executeReader')() else: data_reader = drc.executeReader() network = bayesianpy.network.create_network_from_string(network_string) reader_options = create_data_reader_options.create() variable_refs = list( bayesianpy.network.create_variable_references( network, schema, variable_references=variable_references)) if len(variable_refs) == 0: raise ValueError( "Could not match any variables in the supplied dataset with the network. Is it the same?" ) reader = bayesServer().data.DefaultEvidenceReader( data_reader, jp.java.util.Arrays.asList(variable_refs), reader_options) inference_engine = bayesianpy.model.InferenceEngine( network).create_engine() query_options = bayesianpy.model.InferenceEngine.get_inference_factory( ).createQueryOptions() query_output = bayesianpy.model.InferenceEngine.get_inference_factory( ).createQueryOutput() for query in query_instances: query.setup(network, inference_engine, query_options) ev = bayesianpy.model.Evidence(network, inference_engine).apply() results = [] i = 0 try: while reader.read(ev, bayesServer().data.DefaultReadOptions(True)): result = {} try: inference_engine.query(query_options, query_output) except BaseException as e: logger.error(e) # inference_engine.getEvidence().clear() # continue for query in query_instances: result = { **result, **query.results(inference_engine, query_output) } ev.clear() result.update({ 'caseid': int(reader.getReadInfo().getCaseId().toString()) }) results.append(result) if i % 500 == 0: logger.info("Queried case {}".format(i)) i += 1 except BaseException as e: logger.error("Unexpected Error!") logger.error(e) finally: reader.close() # bayespy.jni.detach() if len(results) == 0: return pd.DataFrame() return pd.DataFrame(results).set_index('caseid') except BaseException as e: q = [str(query) for query in query_instances] logger.error("Unexpected Error: {}. Using queries: {}".format( e, r"\n ".join(q)))