def perturb(graphs, neighborhood_estimator, feasibility_estimator=None, execute_concurrently=False): """perturb.""" # generate a fixed num of neighbors for each graph in input _perturb_ = _perturb(neighborhood_estimator=neighborhood_estimator, feasibility_estimator=feasibility_estimator) if execute_concurrently: neighbors_list = simple_parallel_map(_perturb_, graphs) else: neighbors_list = [_perturb_(g) for g in graphs] neighbor_graphs = [] for neighbors in neighbors_list: neighbor_graphs.extend(neighbors) return neighbor_graphs
def _predict(self, graphs): preds = [] if self.execute_concurrently is False: preds = [ estimator.predict(graphs) for estimator in self.estimators ] else: _ensemble_score_estimator_predict_ = ensemble_score_estimator_predict( estimators=self.estimators[:], graphs=graphs[:]) results = simple_parallel_map(_ensemble_score_estimator_predict_, range(len(self.estimators))) preds = [pred for id, pred in sorted(results, key=lambda x: x[0])] print('p--%s' % (preds)) return preds
def fit(self, graphs, targets, unlabeled_graphs=None): """fit.""" if self.execute_concurrently is False: self.estimators = [ estimator.fit(graphs, targets) for estimator in self.estimators ] else: _ensemble_score_estimator_fit_ = ensemble_score_estimator_fit( estimators=self.estimators[:], graphs=graphs[:], targets=targets[:], unlabeled_graphs=unlabeled_graphs[:]) results = simple_parallel_map(_ensemble_score_estimator_fit_, range(len(self.estimators))) self.estimators = [ estimator for id, estimator in sorted(results, key=lambda x: x[0]) ] print('e--%s' % (self.estimators)) return self
def select_iterated_neighborhoods( proposed_graphs, neighborhood_fitting_graphs, neighborhood_fitting_scores, graphs, neighborhood_estimators, feasibility_estimator, score_estimator, n_steps_driven_by_estimator, sample_size_to_perturb, n_queries_to_oracle_per_iter, parallelization_strategy): """select_iterated_neighborhoods.""" if parallelization_strategy == 'neighborhood_wise': execute_concurrently = True else: execute_concurrently = False for n_estimator, neighborhood_estimator in enumerate(neighborhood_estimators): neighborhood_estimator.fit(neighborhood_fitting_graphs, neighborhood_fitting_scores) for step in range(n_steps_driven_by_estimator): all_proposed_graphs = [] _materialize_iterated_neighborhood_ = materialize_iterated_neighborhood( neighborhood_estimators=neighborhood_estimators, next_proposed_graphs=proposed_graphs[:], graphs=graphs, feasibility_estimator=feasibility_estimator, score_estimator=score_estimator, step=step, n_steps_driven_by_estimator=n_steps_driven_by_estimator, sample_size_to_perturb=sample_size_to_perturb, n_queries_to_oracle_per_iter=n_queries_to_oracle_per_iter) if execute_concurrently: list_of_graphs = simple_parallel_map(_materialize_iterated_neighborhood_, range(len(neighborhood_estimators))) else: list_of_graphs = [_materialize_iterated_neighborhood_(i) for i in range(len(neighborhood_estimators))] for gs in list_of_graphs: all_proposed_graphs += gs # for n_estimator, neighborhood_estimator in enumerate(neighborhood_estimators): # next_proposed_graphs = _materialize_iterated_neighborhood_(n_estimator) # all_proposed_graphs += next_proposed_graphs proposed_graphs = remove_duplicates(all_proposed_graphs) proposed_graphs = remove_duplicates_in_set(proposed_graphs, graphs) predicted_scores = score_estimator.predict(proposed_graphs) if step < n_steps_driven_by_estimator - 1: sample_size = sample_size_to_perturb else: sample_size = n_queries_to_oracle_per_iter proposed_graphs, proposed_scores = sample( proposed_graphs, predicted_scores, sample_size, greedy_frac=0.5) if n_queries_to_oracle_per_iter < len(proposed_graphs): logger.info('sampling %d out of %d non redundant graphs out of %d graphs generated for oracle evaluation' % ( n_queries_to_oracle_per_iter, len(proposed_graphs), len(all_proposed_graphs))) proposed_graphs, proposed_scores = sample( proposed_graphs, predicted_scores, n_queries_to_oracle_per_iter, greedy_frac=0.5) else: # keep all proposed graphs proposed_scores = predicted_scores # at this point we have proposed_graphs, proposed_scores: # for each graph we have the 'parent' and the 'type' and the score # we can formulate a learning task where starting from the parent graph # we have to predict in multiclass the type that has max score in any of the offsprings # so we have to collect all predictions relative to the same (hashed) parent graph # and select the argmax as the class to predict # the prediction can be used to allocate resources: expand only the k types that are # predicted to be yielding the best future improvements with k depending on the budget # the type predictor will be passed to the 'perturb' function and will mute the generation # for types predicted to under perform # NOTE: code should allow empty neighbor_graphs # Note: allow for no prediction at all when all types need to be generated return proposed_graphs, proposed_scores