def _make_crossover_sequence(num_sections: int, num_individuals: int, rng: np.random.RandomState) -> tp.List[int]: assert num_individuals > 1 indices = rng.permutation(num_individuals).tolist() while len(indices) < num_sections: new_indices = rng.permutation(num_individuals).tolist() if new_indices[0] == indices[-1]: new_indices[0], new_indices[-1] = new_indices[-1], new_indices[0] indices.extend(new_indices) indices = indices[:num_sections] if 0 not in indices: indices[rng.randint(num_sections)] = 0 # always involve first element return indices # type: ignore
def random_ordered_smiles(smiles: str, rng: np.random.RandomState) -> str: """ Returns a randomly chosen SMILES string that represents a molecule. """ mol = Chem.MolFromSmiles(smiles) new_perm = rng.permutation(mol.GetNumAtoms()).tolist() new_mol = Chem.RenumberAtoms(mol, new_perm) return Chem.MolToSmiles(new_mol, canonical=False)
def shuffle(self, random_state: np.random.RandomState = np.random) -> "DataDict": shuffled = DataDict() shuffle_idx = None for item in self: shuffle_idx = random_state.permutation(self[item].shape[0]) \ if shuffle_idx is None else shuffle_idx shuffled[item] = self[item][shuffle_idx] return shuffled
def lhs(dimensions: int, size: int, state: np.random.RandomState, modified: bool = False) -> Tuple[Array, Array]: """Use Latin Hypercube Sampling to generate nodes and weights for integration.""" # generate the samples samples = np.zeros((size, dimensions)) for dimension in range(dimensions): samples[:, dimension] = state.permutation(np.arange(size) + state.uniform(size=1 if modified else size)) / size # transform the samples and construct weights nodes = scipy.stats.norm().ppf(samples) weights = np.repeat(1 / size, size) return nodes, weights
def _recursive_sample_from_dag_starting_at_node(rng: np.random.RandomState, dag, node_smi, smiles_seen, ancestor_smiles, connect_flag, max_depth): # Check whether it will create a loop. If so then raise an Exception and we'll back up... if node_smi in ancestor_smiles: raise _LoopException else: ancestor_smiles = ancestor_smiles | { node_smi } # note that this creates a new set. # Work out whether it has a shared node with somewhere else in the DAG. -- this is just as an interesting statistic connect_flag = connect_flag or node_smi in smiles_seen smiles_seen.add(node_smi) # Record how deep we have gone! max_depth += 1 # If we are at a final node then we are done exploring further. in_edges = list(dag.in_edges((node_smi, ))) if len(in_edges) == 0: tuple_tree = (node_smi, []) else: in_edge_possible_indices = rng.permutation(len(in_edges)) for idx in in_edge_possible_indices: last_possible_idx_flag = idx == in_edge_possible_indices[-1] in_edge = in_edges[idx] reaction = in_edge[0] reactants = reaction[0] assert node_smi in reaction[1], "not in products...?" try: tuple_tree_down, max_depth, connect_flag = zip(*[ _recursive_sample_from_dag_starting_at_node( rng, dag, n, smiles_seen, ancestor_smiles, connect_flag, max_depth) for n in reactants ]) except _LoopException as ex: if last_possible_idx_flag: raise ex else: continue else: tuple_tree = (node_smi, list(tuple_tree_down)) max_depth = np.max(max_depth) connect_flag = any(connect_flag) break # it worked so do not need to explore other possibilities. return tuple_tree, max_depth, connect_flag
def _normal_random_recurrent_weights( hidden_layer_size: int, fan_in: int, random_state: np.random.RandomState) \ -> Union[np.ndarray, scipy.sparse.csr.csr_matrix]: """ Return normally distributed random reservoir weights. Parameters ---------- hidden_layer_size : Union[int, np.integer] fan_in : Union[int, np.integer] Determines how many features are mapped to one neuron. random_state : numpy.random.RandomState Returns ------- normal_random_recurrent_weights : Union[np.ndarray, scipy.sparse.csr.csr_matrix], shape=(hidden_layer_size, hidden_layer_size) """ nr_entries = int(hidden_layer_size * fan_in) weights_array = random_state.normal(loc=0., scale=1., size=nr_entries) if fan_in < hidden_layer_size: indices = np.zeros(shape=nr_entries, dtype=int) indptr = np.arange(start=0, stop=(hidden_layer_size + 1) * fan_in, step=fan_in) for en in range(0, hidden_layer_size * fan_in, fan_in): indices[en:en + fan_in] = random_state.permutation( hidden_layer_size)[:fan_in].astype(int) recurrent_weights_init = scipy.sparse.csr_matrix( (weights_array, indices, indptr), shape=(hidden_layer_size, hidden_layer_size), dtype='float64') else: recurrent_weights_init = weights_array.reshape( (hidden_layer_size, hidden_layer_size)) try: we = eigens(recurrent_weights_init, k=np.minimum(10, hidden_layer_size - 2), which='LM', return_eigenvectors=False, v0=random_state.normal(loc=0., scale=1., size=hidden_layer_size)) except ArpackNoConvergence: print("WARNING: No convergence! Returning possibly invalid values!!!") we = ArpackNoConvergence.eigenvalues return recurrent_weights_init / np.amax(np.absolute(we))
def default_format_fn( sample: Dict[str, Any], input_prefix: str, output_prefix: str, choice_prefix: str, rng: np.random.RandomState, append_choices_to_input: bool = True, ) -> Dict[str, Any]: """Default format for tasks. Args: sample: Dictionary with an 'input' entry and a 'target' or 'target_scores entry (or both), describing a single example. input_prefix: input prefix, prepended to all inputs. output_prefix: output prefix, prepended to outputs and choices (if present). choice_prefix: prefix prepended to each choice in a multiple-choice question. rng: random number generator append_choices_to_input: append choices to input for multiple choice. Returns: sample: Formatted dictionary, with 'choice' key added if present in input. Raises: Exception: If output not in choices. """ def input_format(text): return input_prefix + text if "target_scores" in sample: choice_dic = sample["target_scores"] if append_choices_to_input: permuted_choices = rng.permutation(sorted(list(choice_dic.keys()))) sample["input"] = ( sample["input"] + choice_prefix + choice_prefix.join(permuted_choices) ) if "target" not in sample: max_score = max(choice_dic.values()) # type: ignore # Target corresponds to maximum score. # If multiple choices have same score it will chose the first one. sample["target"] = [k for k, v in choice_dic.items() if v == max_score][ 0 ] # type: ignore sample["choice"] = list(sample["target_scores"].keys()) sample["input"] = input_format(sample["input"]) + output_prefix if not isinstance(sample["target"], list): sample["target"] = [sample["target"]] return sample
def shuffle_participants(self, data: np.ndarray, random_state: np.random.RandomState) -> np.ndarray: shuffled_data = np.ndarray(data.shape, data.dtype) shuffled_data[:, :] = data for tid in (0, 1): for cont_member_id, rand_member_id in enumerate(list(random_state.permutation(5))): if cont_member_id == rand_member_id: continue cont_pid = tid * 5 + cont_member_id rand_pid = tid * 5 + rand_member_id cont_key_part = "participants.{pid:d}.".format(pid=cont_pid) cont_ban_key_part = "teams.{tid:d}.bans.{mid:d}.".format(tid=tid, mid=cont_member_id) rand_key_part = "participants.{pid:d}.".format(pid=rand_pid) rand_ban_key_part = "teams.{tid:d}.bans.{mid:d}.".format(tid=tid, mid=rand_member_id) cont_cids = np.where([col.name.startswith(cont_key_part) or col.name.startswith(cont_ban_key_part) for col in self.specs])[0] rand_cids = np.where([col.name.startswith(rand_key_part) or col.name.startswith(rand_ban_key_part) for col in self.specs])[0] shuffled_data[:, cont_cids] = data[:, rand_cids] return shuffled_data
def halton(dimensions: int, size: int, start: int, scramble: bool, state: np.random.RandomState) -> Tuple[Array, Array]: """Generate nodes and weights for integration according to the Halton sequence.""" # generate Halton sequences sequences = np.zeros((size, dimensions)) for dimension in range(dimensions): base = get_prime(dimension) factor = 1 / base indices = np.arange(start, start + size) while 1 - factor < 1: indices, remainders = np.divmod(indices, base) if scramble: remainders = state.permutation(base)[remainders] sequences[:, dimension] += factor * remainders factor /= base # transform the sequences and construct weights nodes = scipy.stats.norm().ppf(sequences) weights = np.repeat(1 / size, size) return nodes, weights
def diagonal_potential_conv(d_1: int, d_2: int, rng: np.random.RandomState) -> np.ndarray: kernel = np.array([[0.5, 1, 0.5]]) factor_potential = rng.randint(4, 10, size=(d_1, d_2)) * 1.0 dim = np.min([d_1, d_2]) identity = np.eye(dim) if rng.normal(size=1) > 1: identity = np.flip(identity, axis=0) if d_2 > d_1: diagonal = np.concatenate([identity, np.zeros(dim, d_2 - dim)], axis=1) elif d_2 < d_1: diagonal = np.concatenate([identity, np.zeros(d_1 - dim, dim)], axis=0) else: diagonal = identity diagonal = rng.permutation(diagonal) diagonal_dominance = np.exp(factor_potential) + diagonal * 50000 diagonal_dominance /= np.mean(diagonal_dominance) # return diagonal_dominance / np.mean(diagonal_dominance) return signal.convolve2d(diagonal_dominance, kernel, mode="same")
def _uniform_random_input_weights( n_features_in: int, hidden_layer_size: int, fan_in: int, random_state: np.random.RandomState) \ -> Union[np.ndarray, scipy.sparse.csr_matrix]: """ Return uniform random input weights in range [-1, 1]. Parameters ---------- n_features_in : int hidden_layer_size : int fan_in : int Determines how many features are mapped to one neuron. random_state : numpy.random.RandomState Returns ------- uniform_random_input_weights : Union[np.ndarray, scipy.sparse.csr.csr_matrix], shape = (n_features, hidden_layer_size) The randomly initialized input weights. """ nr_entries = int(n_features_in * fan_in) weights_array = random_state.uniform(low=-1., high=1., size=nr_entries) if fan_in < hidden_layer_size: indices = np.zeros(shape=nr_entries, dtype=int) indptr = np.arange(start=0, stop=(n_features_in + 1) * fan_in, step=fan_in) for en in range(0, n_features_in * fan_in, fan_in): indices[en:en + fan_in] = random_state.permutation( hidden_layer_size)[:fan_in].astype(int) return scipy.sparse.csr_matrix( (weights_array, indices, indptr), shape=(n_features_in, hidden_layer_size), dtype='float64') else: return weights_array.reshape((n_features_in, hidden_layer_size))
def _build_portfolio( y_test: np.ndarray, y_valid: Optional[np.ndarray], runtimes_matrix: np.ndarray, config_ids: List[int], config_id_to_idx: Dict[int, int], config_to_budget_to_idx: Dict[str, Dict[float, int]], task_id_to_idx: Dict[int, int], portfolio_size: int, rng: np.random.RandomState, losses: Optional[Dict[int, float]], fidelity_strategy: FidelityStrategy, ) -> Tuple[List[str], np.ndarray, List[Dict[float, int]]]: shuffled_config_ids = rng.permutation(list(config_ids)) if y_valid is None: y_valid = y_test portfolio = [] budget_to_idx = [] cache_2 = None old_performances = np.ones((len(y_test), )) if losses: for task_id in losses: task_idx = task_id_to_idx[task_id] old_performances[task_idx] = losses[task_id] for i in range(portfolio_size): scores = [] caches_2 = [] # Define these here to have the code more similar to # _build_portfolio_with_cutoff cutoffs = np.ones((i + 1, )) * np.inf runtimes = runtimes_matrix.copy() runtimes[np.isfinite(runtimes)] = 0.0 max_runtime = np.inf for j, config_id in enumerate(shuffled_config_ids): if config_id in portfolio: scores.append(np.inf) caches_2.append(None) else: portfolio.append(config_id) # # for-loop-based version, written in pure Python. Comment this in to check # # the vectorized version in Cython, which is way more involved # scores_j = [] # for idx in range(len(y_valid)): # score = fidelity_strategy.play( # y_valid=y_valid[idx], # y_test=y_test[idx], # runtimes=runtimes[idx], # configurations=portfolio, # config_id_to_idx=config_id_to_idx, # config_to_budget_to_idx=config_to_budget_to_idx, # cutoffs=cutoffs, # max_runtime=max_runtime, # )[1] # scores_j.append(score) # cython + vectorized _, test_wise_scores_2, cache_j2 = \ fidelity_strategy.play_cythonized_vectorized( y_valid=y_valid, y_test=y_test, runtimes=runtimes, configurations=portfolio, config_id_to_idx=config_id_to_idx, cutoffs=cutoffs, config_to_budget_to_idx=config_to_budget_to_idx, max_runtime=float(max_runtime), cache=cache_2, ) # np.testing.assert_array_almost_equal( # scores_j, test_wise_scores_2, # ) caches_2.append(cache_j2) del portfolio[-1] # One can interchange scores_j and test_wise_scores here test_wise_scores = np.minimum(old_performances, test_wise_scores_2) scores.append(np.mean(test_wise_scores)) argmin = int(np.argmin(scores)) # type: int config_id = shuffled_config_ids[argmin] print(i, scores[argmin], config_id) portfolio.append(config_id) budget_to_idx.append( config_to_budget_to_idx[config_id_to_idx[config_id]]) if len(caches_2) == len(scores): cache_2 = caches_2[argmin] # If converged! if np.min(scores) <= 0: break return portfolio, np.array([np.inf] * len(portfolio), dtype=np.float64), budget_to_idx
def _build_portfolio_with_cutoffs( y_test: np.ndarray, y_valid: Optional[np.ndarray], runtimes_matrix: np.ndarray, config_ids: List[int], config_id_to_idx: Dict[int, int], config_to_budget_to_idx: Dict[str, Dict[float, int]], task_id_to_idx: Dict[int, int], portfolio_size: int, rng: np.random.RandomState, max_runtime: int, losses: Optional[Dict[int, float]], fidelity_strategy: FidelityStrategy, ) -> Tuple[List[str], np.ndarray, List[Dict[float, int]]]: shuffled_config_ids = rng.permutation(list(config_ids)) if y_valid is None: y_valid = y_test old_performances = np.ones((len(y_test), )) if losses: for task_id in losses: task_idx = task_id_to_idx[task_id] old_performances[task_idx] = losses[task_id] nanmax = np.nanmax(runtimes_matrix) + 1 factor = 2 ts = ([ int(2**(exponent / factor)) for exponent in range(0, factor * int(np.ceil(np.log2(max_runtime)))) if 2**(exponent / factor) <= (max_runtime / 2) ] + [max_runtime / 2, max_runtime]) if nanmax > 0 and nanmax < max_runtime: ts.append(nanmax) if portfolio_size == 1: ts += [max_runtime] ts = np.unique(ts) scores_t = [] portfolios_t = [] budget_to_idx_t = [] cutoffs_t = [] n_iter_above_max_observed_runtime = 0 for t_idx, t in enumerate(ts): cache_2 = None if (t_idx + 1 < len(ts) and (ts[t_idx] * portfolio_size < max_runtime) and ((ts[t_idx + 1]) * portfolio_size < max_runtime)): print('Skipping cutoff', t) continue if n_iter_above_max_observed_runtime > 0: print('Skipping cutoff', t) continue if t > np.nanmax(runtimes_matrix): print('Cutoff %f larger than nanmax %f of runtimes matrix' % (t, float(np.nanmax(runtimes_matrix)))) n_iter_above_max_observed_runtime += 1 portfolio = [] budget_to_idx = [] trajectory = [] cutoffs = None for i in range(portfolio_size): scores = [] caches_2 = [] cutoffs = np.array([t] * (i + 1), dtype=np.float64) for j, config_id in enumerate(shuffled_config_ids): if config_id in portfolio: scores.append(np.inf) caches_2.append(None) else: portfolio.append(config_id) # # For-loop based version # scores_per_task = [] # for idx in range(len(y_valid)): # score = fidelity_strategy.play( # y_valid=y_valid[idx], # y_test=y_test[idx], # runtimes=runtimes_matrix[idx], # configurations=portfolio, # config_id_to_idx=config_id_to_idx, # config_to_budget_to_idx=config_to_budget_to_idx, # cutoffs=cutoffs, # max_runtime=np.float64(max_runtime), # )[1] # scores_per_task.append(score) # Cythonized version _, test_wise_scores_2, cache_j2 = ( fidelity_strategy.play_cythonized_vectorized( y_valid=y_valid, y_test=y_test, runtimes=runtimes_matrix, configurations=portfolio, config_id_to_idx=config_id_to_idx, config_to_budget_to_idx=config_to_budget_to_idx, cutoffs=cutoffs, max_runtime=np.float64(max_runtime), cache=cache_2, )) # scores_per_task = np.array(scores_per_task, dtype=test_wise_scores_2.dtype) # try: # np.testing.assert_array_almost_equal( # scores_per_task, test_wise_scores_2, # err_msg=str( # ( # t, # portfolio, # list(scores_per_task), # list(test_wise_scores_2), # ) # ) # ) # except AssertionError: # print(scores_per_task.dtype, test_wise_scores_2.dtype) # for s1, s2 in zip(scores_per_task, test_wise_scores_2): # print(s1, s2, type(s1), type(s2), abs(s1 - s1)) # raise # One can interchange scores_j and test_wise_scores here test_wise_scores = np.minimum(old_performances, test_wise_scores_2) # print(j, t, np.mean(test_wise_scores), test_wise_scores) del portfolio[-1] scores.append(test_wise_scores.mean()) caches_2.append(cache_j2) argmin = int(np.argmin(scores)) # type: int trajectory.append(scores[argmin]) config_id = shuffled_config_ids[argmin] print(i, scores[argmin], cutoffs[0]) portfolio.append(config_id) budget_to_idx.append( config_to_budget_to_idx[config_id_to_idx[config_id]]) if len(caches_2) == len(scores): cache_2 = caches_2[argmin] # If converged! if np.min(scores) <= 0: break scores_t.append(trajectory[-1]) portfolios_t.append(portfolio) budget_to_idx_t.append(budget_to_idx) if cutoffs is None: raise ValueError('Cutoffs array should not be None!') cutoffs_t.append(cutoffs) argmin = int(np.argmin(scores_t)) print('Selecting cutoff %f, score %f' % (cutoffs_t[argmin][0], scores_t[argmin])) print({cutoffs_t[i][0]: scores_t[i] for i in range(len(cutoffs_t))}) portfolio = portfolios_t[argmin] budget_to_idx = budget_to_idx_t[argmin] cutoffs = np.array(cutoffs_t[argmin], dtype=np.float64) print(portfolio, cutoffs) return portfolio, cutoffs, budget_to_idx
def generate_instance( n_rows: int, n_cols: int, density: float, max_coef: int, rng: np.random.RandomState ): """Generates an instance of a combinatorial auction problem. This method generates an instance of a combinatorial auction problem based on the specified parameters and returns it as an ecole model. Algorithm described in: E.Balas and A.Ho, Set covering algorithms using cutting planes, heuristics, and subgradient optimization: A computational study, Mathematical Programming, 12 (1980), 37-60. Parameters ---------- n_rows: The number of rows. n_cols: The number of columns. density: The density of the constraint matrix. The value must be in the range (0,1]. max_coef: Maximum objective coefficient. The value must be in >= 1. Returns ------- model: an ecole model of a set cover instance. """ nnzrs = int(n_rows * n_cols * density) assert nnzrs >= n_rows # at least 1 col per row assert nnzrs >= 2 * n_cols # at leats 2 rows per col indices = np.empty((nnzrs,), dtype=int) # sample column indices indices[: 2 * n_cols] = np.arange(2 * n_cols) % n_cols # force at leats 2 rows per col indices[2 * n_cols :] = ( rng.choice(n_cols * (n_rows - 2), size=nnzrs - (2 * n_cols), replace=False) % n_cols ) # remaining column indexes are random # count the resulting number of rows, for each column _, col_n_rows = np.unique(indices, return_counts=True) # for each column, sample row indices i = 0 indptr = [0] indices[:n_rows] = rng.permutation(n_rows) # pre-fill to force at least 1 column per row for n in col_n_rows: # column is already filled, nothing to do if i + n <= n_rows: pass # column is empty, fill with random rows elif i >= n_rows: indices[i : i + n] = rng.choice(n_rows, size=n, replace=False) # column is partially filled, complete with random rows among remaining ones elif i + n > n_rows: remaining_rows = np.setdiff1d( np.arange(n_rows), indices[i:n_rows], assume_unique=True ) indices[n_rows : i + n] = rng.choice( remaining_rows, size=i + n - n_rows, replace=False ) i += n indptr.append(i) # sample objective coefficients c = rng.randint(max_coef, size=n_cols) + 1 # convert csc indices/indptr to csr indices/indptr indptr_csr = np.zeros((n_rows + 1), dtype=int) indptr_counter = np.zeros((n_rows + 1), dtype=int) indices_csr = np.zeros(len(indices), dtype=int) # compute indptr for csr for i in range(len(indices)): indptr_csr[indices[i] + 1] += 1 indptr_csr = np.cumsum(indptr_csr) # compute indices for csr for col in range(n_cols): for row in indices[indptr[col] : indptr[col + 1]]: indices_csr[indptr_csr[row] + indptr_counter[row]] = col indptr_counter[row] += 1 model = ecole.scip.Model.prob_basic() pyscipopt_model = model.as_pyscipopt() pyscipopt_model.setMinimize() # add variables for j in range(n_cols): pyscipopt_model.addVar(name=f"x{j+1}", vtype="B", obj=c[j]) # add constraints pyscipopt_model_vars = pyscipopt_model.getVars() for i in range(n_rows): cons_lhs = 0 consvars = [ pyscipopt_model_vars[j] for j in indices_csr[indptr_csr[i] : indptr_csr[i + 1]] ] for var in consvars: cons_lhs += var pyscipopt_model.addCons(cons_lhs >= 1) return model
def _call( vae: VariationalAutoencoder, *, ds: tf.data.Dataset, rand: np.random.RandomState, take_count: int = -1, n_images: int = 36, verbose: bool = True ) -> Tuple[float, float, np.ndarray, np.ndarray, np.ndarray, Distribution, List[Distribution], List[Distribution]]: """ Returns ------- llk_x, llk_y, x_org, x_rec, y_true, y_pred, all_qz, all_pz """ ds = ds.take(take_count) prog = tqdm(ds, disable=not verbose) llk_x, llk_y = [], [] y_true, y_pred = [], [] x_org, x_rec = [], [] Q_zs = [] P_zs = [] for x, y in prog: P, Q = vae(x, training=False) P = as_tuple(P) Q, Q_prior = vae.get_latents(return_prior=True) Q = as_tuple(Q) Q_prior = as_tuple(Q_prior) y_true.append(y) px = P[0] # semi-supervised if len(P) > 1: py = P[-1] y_pred.append(_dist(py)) if y.shape[1] == py.event_shape[0]: llk_y.append(py.log_prob(y)) Q_zs.append(_dist(Q)) P_zs.append(_dist(Q_prior)) llk_x.append(px.log_prob(x)) # for the reconstruction if rand.uniform() < 0.005 or len(x_org) < 2: x_org.append(x) x_rec.append(px.mean()) # log-likelihood llk_x = tf.reduce_mean(tf.concat(llk_x, axis=0)).numpy() llk_y = tf.reduce_mean(tf.concat(llk_y, axis=0)).numpy() \ if len(llk_y) > 0 else -np.inf # latents n_latents = len(Q_zs[0]) all_qz = [Batchwise([z[i] for z in Q_zs]) for i in range(n_latents)] all_pz = [ Batchwise([z[i] for z in P_zs]) if len(P_zs[0][i].batch_shape) > 0 else P_zs[0][i] for i in range(n_latents) ] # reconstruction x_org = tf.concat(x_org, axis=0).numpy() x_rec = tf.concat(x_rec, axis=0).numpy() ids = rand.permutation(x_org.shape[0]) x_org = x_org[ids][:n_images] x_rec = x_rec[ids][:n_images] x_rec = _prepare_images(x_rec, normalize=True) x_org = _prepare_images(x_org, normalize=False) # labels y_true = tf.concat(y_true, axis=0).numpy() if len(y_pred) > 0: y_pred = Batchwise(y_pred, name='LabelsTest') return llk_x, llk_y, x_org, x_rec, y_true, y_pred, all_qz, all_pz
def subsample(total_items, max_items, rs: np.random.RandomState): perms = rs.permutation(total_items) if total_items < max_items: return perms return perms[:max_items]