def create(self, network_factory: bayesianpy.network.NetworkFactory): network = network_factory.create() #builder.create_cluster_variable(network, 5) if not dk.empty(self._continuous): for c_name in self._continuous.columns: builder.create_continuous_variable(network, c_name) if not dk.empty(self._discrete): for d_name in self._discrete.columns: builder.create_discrete_variable(network, self._discrete, d_name, blanks=self._blanks) network = bayesianpy.network.remove_single_state_nodes(network) return network
def create(self, network_factory): network = network_factory.create() cluster = builder.try_get_node(network, "Cluster") if cluster is None: cluster = builder.create_cluster_variable( network, self._latent_states, variable_name=self._latent_variable_name) if not dk.empty(self._continuous): for c_name in self._continuous.columns: self._logger.info("Pre-processing {} column".format(c_name)) c = builder.create_continuous_variable(network, c_name) try: builder.create_link(network, cluster, c) except ValueError as e: self._logger.warn(e) if not dk.empty(self._discrete): for d_name in self._discrete.columns: if d_name in self._discrete_states: states = self._discrete_states[str(d_name)] else: states = dk.compute(self._discrete[str( d_name)].dropna().unique()).tolist() try: c = builder.create_discrete_variable( network, self._discrete, str(d_name), states) builder.create_link(network, cluster, c) except BaseException as e: self._logger.warn(e) return network
def create(self, network_factory): network = network_factory.create() if not dk.empty(self._continuous): for c_name in self._continuous.columns: c = builder.create_continuous_variable(network, c_name) if dk.empty(self._discrete): for d_name in self._discrete.columns: if d_name in self._discrete_states: states = self._discrete_states[d_name] else: states = dk.compute( self._discrete[d_name].dropna().unique()).tolist() try: c = builder.create_discrete_variable( network, self._discrete, d_name, states) except BaseException as e: self._logger.warn(e) parent_node = builder.try_get_node(network, self._parent_node) if parent_node is None: raise ValueError("Parent node: {} not recognised".format( self._parent_node)) for node in network.getNodes(): if node == parent_node: continue builder.create_link(network, parent_node, node) return network
def analyse(self, df: pd.DataFrame, continuous_variable_names: List[str]): kf = NewKFold(n_splits=3, shuffle=self._shuffle) network_factory = bayesianpy.network.NetworkFactory(self._logger) variations = [1, 5, 10, 20, 30] results = {} with bayesianpy.data.DataSet(df, logger=self._logger) as dataset: ll = defaultdict(list) for variable in continuous_variable_names: likelihoods = [] for cluster_count in variations: weighted = [] weights = [] for k, (train_indexes, test_indexes) in enumerate(kf): x_train, x_test = train_indexes, test_indexes nt = network_factory.create() cluster = builder.create_cluster_variable( nt, cluster_count) node = builder.create_continuous_variable(nt, variable) builder.create_link(nt, cluster, node) model = bayesianpy.model.NetworkModel(nt, self._logger) try: ll = model.train(dataset.subset( x_train)).get_metrics()['loglikelihood'] except BaseException as e: self._logger.warning(e) continue weighted.append(ll) weights.append(len(x_train)) likelihoods.append(np.average(weighted, weights=weights)) max_index = np.argmax(likelihoods) if variations[max_index] > 5: results.update({variable: True}) else: results.update({variable: False}) return results
from bayesianpy.network import Builder as builder import bayesianpy.network nt = bayesianpy.network.create_network() # where df is your dataframe task = builder.create_discrete_variable(nt, df, 'task') size = builder.create_continuous_variable(nt, 'size') grasp_pose = builder.create_continuous_variable(nt, 'GraspPose') builder.create_link(nt, size, grasp_pose) builder.create_link(nt, task, grasp_pose) for v in ['fill level', 'object shape', 'side graspable']: va = builder.create_discrete_variable(nt, df, v) builder.create_link(nt, va, grasp_pose) builder.create_link(nt, task, va) # write df to data store with bayesianpy.data.DataSet(df, bayesianpy.utils.get_path_to_parent_dir(__file__), logger) as dataset: model = bayesianpy.model.NetworkModel(nt, logger) model.train(dataset) # to query model multi-threaded results = model.batch_query(dataset, [bayesianpy.model.QueryModelStatistics()], append_to_df=False)