def __sanity_checks(X, y=None): if len(np.shape(X)) != 2: raise ValueError( f'Dataset X must be array of shape (n_datapoints, 2), was given {np.shape(X)}.' ) n_datapoints, feature_dim = np.shape(X) if feature_dim != 2: raise ValueError( f'Dataset X must have feature dimension of 2, was given {feature_dim}' ) if y is not None: if len(y) != n_datapoints: raise ValueError( f'Targets y must be of same length as X. Expected length {n_datapoints}, was given {len(y)}' ) data_classes = np.unique(y) if len(data_classes) != 2: raise ValueError( f'Currently only binary classification problems are supported!' )
def __init__(self, embedding, X, y): """ Args: embedding (BaseEmbedding): Instance of BaseEmbedding X (np.array): Training dataset of shape (n_datapoints, feature_dim) y (np.array): Training labels of shape (n_datapoints,) """ # check if the embedding object is of the correct type if not isinstance(embedding, BaseEmbedding): raise ValueError( 'Embedding must be an instance that inherits from BaseEmbedding class.' ) self.embedding = embedding # check if the dataset X is of the correct shape if len(np.shape(X)) != 2: raise ValueError( f'Dataset X must be array of shape (n_datapoints, feature_dim), was given {np.shape(X)}.' ) self.n_datapoints, self.feature_dim = np.shape(X) # check if the dataset X and the training labels y have the same first dimension if self.n_datapoints != np.shape(y)[0]: raise ValueError( f'Dataset X and training labels y must have the same first dimension. Got {self.n_datapoints} datapoints and {np.shape(y)[0]} labels.' ) # check if the dataset feature dimension matches the embedding feature dimension if self.feature_dim != embedding.feature_dim: raise ValueError( f'Dataset dimension does not match embedding feature dimension! Expected d={embedding.feature_dim}, was given d={self.feature_dim}.' ) self.X = np.array(X, dtype=float, requires_grad=False) self.y = np.array(y, requires_grad=False) self.data_classes = np.unique(self.y) self.n_data_classes = len(self.data_classes) self.class_priors = np.array([ len(X[self.y == data_class]) / self.n_datapoints for data_class in self.data_classes ]) if self.n_data_classes > 2: raise NotImplementedError( 'EmbeddingTrainer currently only supports 2-class classification thesis_datasets!' ) self.X_1 = self.X[self.y == self.data_classes[0]] self.X_2 = self.X[self.y == self.data_classes[1]] self.opt = None
def get_cell_centers(cells): """Get average coordinates per cell Args: cells (ndarray<int>): Cells as computed by `get_cells` Returns: centers (dict): Map from cell labels to mean coordinates of cells """ centers = {} for _id in np.unique(cells): wheres = np.where(cells == _id) centers[_id] = np.array( [np.mean(where.astype(float)) for where in wheres]) return centers
def get_cells(vert, horz, iterations=None): """Given boolean boundary matrices, obtain cells via spreading iterations Args: vert (ndarray<bool>, shape=(m,n-1)): Vertical boundaries horz (ndarray<bool>, shape=(m-1,n)): Horizontal boundaries iterations=None (int): Number of spreading iterations. If None, defaults to max(m, n) Returns: mat (ndarray<int>, shape=(m,n)): Cells, given as labeling of matrix elements. The labels are contiguous. """ num_rows = vert.shape[0] # This is m in the docstring. num_cols = horz.shape[1] # This is n in the docstring. if iterations is None: iterations = max(num_rows, num_cols) mat = np.arange(num_rows * num_cols, dtype=int).reshape( (num_rows, num_cols)) for _ in range(iterations): for i in range(num_rows): for j in range(num_cols): nghbhood = [(i, j)] if j > 0 and not vert[i, j - 1]: nghbhood.append((i, j - 1)) if j < num_cols - 1 and not vert[i, j]: nghbhood.append((i, j + 1)) if i > 0 and not horz[i - 1, j]: nghbhood.append((i - 1, j)) if i < num_rows - 1 and not horz[i, j]: nghbhood.append((i + 1, j)) nghb_min = np.min([mat[_i, _j] for _i, _j in nghbhood]) for _i, _j in nghbhood: mat[_i, _j] = nghb_min _map = {val: count for count, val in enumerate(np.unique(mat))} for i in range(num_rows): for j in range(num_cols): mat[i, j] = _map[mat[i, j]] return mat
# visual check plt.scatter(one_samples[0], one_samples[1]) plt.xlim(0, 28) plt.ylim(0, 28) plt.show() # + # split the two datasets, containing y=0 and y=1 respectively, into 5 datasets: ## distinct_zeros: samples unique to y=0 ## distinct_ones: samples unique to y=1 ## duplicates: samples that are both in y=0 and y=1 ## not_ones: samples that are not in the "unique to y=0" set ## not_zeros: samples that are not in the "unique to y=1" set zeros = np.unique(np.asarray(zero_samples)[:].T, axis=0) ones = np.unique(np.asarray(one_samples)[:].T, axis=0) distinct_zeros = [] distinct_ones = [] duplicates = [] # find unique zeros and duplicates for sample in zeros: first_index = np.where(ones[:, 0] == sample[0]) if (len(first_index) > 0): second_index = np.where(ones[first_index][:, 1] == sample[1]) if (len(second_index[0]) > 0): duplicates.append(sample) else: distinct_zeros.append(sample) else: distinct_zeros.append(sample)
def test_projector(self, device, tol, skip_if): """Test that a tensor product involving qml.Projector works correctly""" n_wires = 3 dev = device(n_wires) if dev.shots is None: pytest.skip("Device is in analytic mode, cannot test sampling.") if "Projector" not in dev.observables: pytest.skip( "Skipped because device does not support the Projector observable." ) skip_if(dev, {"supports_tensor_observables": False}) theta = 1.432 phi = 1.123 varphi = -0.543 @qml.qnode(dev) def circuit(basis_state): qml.RX(theta, wires=[0]) qml.RX(phi, wires=[1]) qml.RX(varphi, wires=[2]) qml.CNOT(wires=[0, 1]) qml.CNOT(wires=[1, 2]) return qml.sample( qml.PauliZ(wires=[0]) @ qml.Projector(basis_state, wires=[1, 2])) res = circuit([0, 0]) # res should only contain the eigenvalues of the projector matrix tensor product Z, i.e. {-1, 0, 1} assert np.allclose(sorted(np.unique(res)), [-1, 0, 1], atol=tol(False)) mean = np.mean(res) expected = (np.cos(varphi / 2) * np.cos(phi / 2) * np.cos( theta / 2))**2 - (np.cos(varphi / 2) * np.sin(phi / 2) * np.sin(theta / 2))**2 assert np.allclose(mean, expected, atol=tol(False)) var = np.var(res) expected = ( (np.cos(varphi / 2) * np.cos(phi / 2) * np.cos(theta / 2))**2 + (np.cos(varphi / 2) * np.sin(phi / 2) * np.sin(theta / 2))**2 - ((np.cos(varphi / 2) * np.cos(phi / 2) * np.cos(theta / 2))**2 - (np.cos(varphi / 2) * np.sin(phi / 2) * np.sin(theta / 2))**2)**2) assert np.allclose(var, expected, atol=tol(False)) res = circuit([0, 1]) assert np.allclose(sorted(np.unique(res)), [-1, 0, 1], atol=tol(False)) mean = np.mean(res) expected = (np.sin(varphi / 2) * np.cos(phi / 2) * np.cos( theta / 2))**2 - (np.sin(varphi / 2) * np.sin(phi / 2) * np.sin(theta / 2))**2 assert np.allclose(mean, expected, atol=tol(False)) var = np.var(res) expected = ( (np.sin(varphi / 2) * np.cos(phi / 2) * np.cos(theta / 2))**2 + (np.sin(varphi / 2) * np.sin(phi / 2) * np.sin(theta / 2))**2 - ((np.sin(varphi / 2) * np.cos(phi / 2) * np.cos(theta / 2))**2 - (np.sin(varphi / 2) * np.sin(phi / 2) * np.sin(theta / 2))**2)**2) assert np.allclose(var, expected, atol=tol(False)) res = circuit([1, 0]) assert np.allclose(sorted(np.unique(res)), [-1, 0, 1], atol=tol(False)) mean = np.mean(res) expected = (np.sin(varphi / 2) * np.sin(phi / 2) * np.cos( theta / 2))**2 - (np.sin(varphi / 2) * np.cos(phi / 2) * np.sin(theta / 2))**2 assert np.allclose(mean, expected, atol=tol(False)) var = np.var(res) expected = ( (np.sin(varphi / 2) * np.sin(phi / 2) * np.cos(theta / 2))**2 + (np.sin(varphi / 2) * np.cos(phi / 2) * np.sin(theta / 2))**2 - ((np.sin(varphi / 2) * np.sin(phi / 2) * np.cos(theta / 2))**2 - (np.sin(varphi / 2) * np.cos(phi / 2) * np.sin(theta / 2))**2)**2) assert np.allclose(var, expected, atol=tol(False)) res = circuit([1, 1]) assert np.allclose(sorted(np.unique(res)), [-1, 0, 1], atol=tol(False)) mean = np.mean(res) expected = (np.cos(varphi / 2) * np.sin(phi / 2) * np.cos( theta / 2))**2 - (np.cos(varphi / 2) * np.cos(phi / 2) * np.sin(theta / 2))**2 assert np.allclose(mean, expected, atol=tol(False)) var = np.var(res) expected = ( (np.cos(varphi / 2) * np.sin(phi / 2) * np.cos(theta / 2))**2 + (np.cos(varphi / 2) * np.cos(phi / 2) * np.sin(theta / 2))**2 - ((np.cos(varphi / 2) * np.sin(phi / 2) * np.cos(theta / 2))**2 - (np.cos(varphi / 2) * np.cos(phi / 2) * np.sin(theta / 2))**2)**2) assert np.allclose(var, expected, atol=tol(False))
def test_hermitian(self, device, tol, skip_if): """Test that a tensor product involving qml.Hermitian works correctly""" n_wires = 3 dev = device(n_wires) if dev.shots is None: pytest.skip("Device is in analytic mode, cannot test sampling.") if "Hermitian" not in dev.observables: pytest.skip( "Skipped because device does not support the Hermitian observable." ) skip_if(dev, {"supports_tensor_observables": False}) theta = 0.432 phi = 0.123 varphi = -0.543 A_ = 0.1 * np.array([ [-6, 2 + 1j, -3, -5 + 2j], [2 - 1j, 0, 2 - 1j, -5 + 4j], [-3, 2 + 1j, 0, -4 + 3j], [-5 - 2j, -5 - 4j, -4 - 3j, -6], ]) @qml.qnode(dev) def circuit(): qml.RX(theta, wires=[0]) qml.RX(phi, wires=[1]) qml.RX(varphi, wires=[2]) qml.CNOT(wires=[0, 1]) qml.CNOT(wires=[1, 2]) return qml.sample( qml.PauliZ(wires=[0]) @ qml.Hermitian(A_, wires=[1, 2])) res = circuit() # res should only contain the eigenvalues of # the hermitian matrix tensor product Z Z = np.diag([1, -1]) eigvals = np.linalg.eigvalsh(np.kron(Z, A_)) assert np.allclose(sorted(np.unique(res)), sorted(eigvals), atol=tol(False)) mean = np.mean(res) expected = (0.1 * 0.5 * (-6 * np.cos(theta) * (np.cos(varphi) + 1) - 2 * np.sin(varphi) * (np.cos(theta) + np.sin(phi) - 2 * np.cos(phi)) + 3 * np.cos(varphi) * np.sin(phi) + np.sin(phi))) assert np.allclose(mean, expected, atol=tol(False)) var = np.var(res) expected = ( 0.01 * (1057 - np.cos(2 * phi) + 12 * (27 + np.cos(2 * phi)) * np.cos(varphi) - 2 * np.cos(2 * varphi) * np.sin(phi) * (16 * np.cos(phi) + 21 * np.sin(phi)) + 16 * np.sin(2 * phi) - 8 * (-17 + np.cos(2 * phi) + 2 * np.sin(2 * phi)) * np.sin(varphi) - 8 * np.cos(2 * theta) * (3 + 3 * np.cos(varphi) + np.sin(varphi))**2 - 24 * np.cos(phi) * (np.cos(phi) + 2 * np.sin(phi)) * np.sin(2 * varphi) - 8 * np.cos(theta) * (4 * np.cos(phi) * (4 + 8 * np.cos(varphi) + np.cos(2 * varphi) - (1 + 6 * np.cos(varphi)) * np.sin(varphi)) + np.sin(phi) * (15 + 8 * np.cos(varphi) - 11 * np.cos(2 * varphi) + 42 * np.sin(varphi) + 3 * np.sin(2 * varphi)))) / 16) assert np.allclose(var, expected, atol=tol(False))
def run_tree_architecture_search(config: dict, dev_type: str): """The main workhorse for running the algorithm Args: config: Dictionary with configuration parameters for the algorithm. Possible keys are: - nqubits: Integer. The number of qubits in the circuit - min_tree_depth: Integer. Minimum circuit depth before we start pruning - max_tree_depth: Integer. Maximum circuit depth - prune_rate: Integer. Percentage of nodes that we throw away when we prune - prune_step: Integer. How often do we prune - plot_trees: Boolean. Do we want to plot the tree at every depth? - data_set: String. Which dataset are we learning? Can be 'moons' or 'circles' - nsteps: Integer. The number of steps for training. - opt: qml.Optimizer. Pennylane optimizer - batch_size: Integer. Batch size for training. - n_samples: Integer. Number of samples that we want to take from the data set. - learning_rate: Float. Optimizer learning rate. - save_frequency: Integer. How often do we want to save the tree? Set to 0 for no saving. - save_path: String. Location to store the data. Returns: """ # build in: circuit type # if circuit_type=='schuld' use controlled rotation gates and cycle layout for entangling layers # if circuit_type=='hardware' use minimal gate set and path layout for entangling layers # Parse configuration parameters. NQUBITS = config['nqubits'] NSAMPLES = config['n_samples'] PATH = config['save_path'] if dev_type == "local": dev = qml.device("default.qubit.autograd", wires=NQUBITS) elif dev_type == "remote": my_bucket = "amazon-braket-0fc49b964f85" # the name of the bucket my_prefix = PATH.split('/')[1] # name of the folder in the bucket is the same as experiment name s3_folder = (my_bucket, my_prefix) device_arn = "arn:aws:braket:::device/quantum-simulator/amazon/sv1" dev = qml.device("braket.aws.qubit", device_arn=device_arn, wires=NQUBITS, s3_destination_folder=s3_folder, parallel=True, max_parallel=10, poll_timeout_seconds=30) MIN_TREE_DEPTH = config['min_tree_depth'] MAX_TREE_DEPTH = config['max_tree_depth'] SAVE_FREQUENCY = config['save_frequency'] PRUNE_DEPTH_STEP = config['prune_step'] # EVERY ith step is a prune step PRUNE_RATE = config['prune_rate'] # Percentage of nodes to throw away at each layer PLOT_INTERMEDIATE_TREES = config['plot_trees'] assert MIN_TREE_DEPTH < MAX_TREE_DEPTH, 'MIN_TREE_DEPTH must be smaller than MAX_TREE_DEPTH' assert 0.0 < PRUNE_RATE < 1.0, f'The PRUNE_RATE must be between 0 and 1, found {PRUNE_RATE}' if config['data_set'] == 'circles': X_train, y_train = datasets.make_circles(n_samples=NSAMPLES, factor=.5, noise=.05) elif config['data_set'] == 'moons': X_train, y_train = datasets.make_moons(n_samples=NSAMPLES, noise=.05) # rescale data to -1 1 X_train = np.multiply(1.0, np.subtract(np.multiply(np.divide(np.subtract(X_train, X_train.min()), (X_train.max() - X_train.min())), 2.0), 1.0)) if config['readout_layer'] == 'one_hot': # one hot encode labels y_train_ohe = np.zeros((y_train.size, y_train.max() + 1)) y_train_ohe[np.arange(y_train.size), y_train] = 1 elif config['readout_layer'] == 'weighted_neuron': y_train_ohe = y_train # automatically determine the number of classes NCLASSES = len(np.unique(y_train)) assert NQUBITS >= NCLASSES, 'The number of qubits must be equal or larger than the number of classes' save_timing = config.get('save_timing', False) if save_timing: print('saving timing info') import time # Create a directed graph. G = nx.DiGraph() # Add the root G.add_node("ROOT") G.nodes['ROOT']["W"] = 0.0 # nx.set_node_attributes(G, {'ROOT': 0.0}, 'W') # Define allowed layers ct_ = config.get('circuit_type', None) if ct_ == 'schuld': possible_layers = ['ZZ', 'X', 'Y', 'Z'] config['parameterized_gates'] = ['ZZ', 'X', 'Y', 'Z'] if ct_ == 'hardware': possible_layers = ['hw_CNOT', 'X', 'Y', 'Z'] config['parameterized_gates'] = ['X', 'Y', 'Z'] possible_embeddings = [config['embedding'], ] assert all([l in string_to_layer_mapping.keys() for l in possible_layers]), 'No valid mapping from string to function found' assert all([l in string_to_embedding_mapping.keys() for l in possible_embeddings]), 'No valid mapping from string to function found' leaves_at_depth_d = dict(zip(range(MAX_TREE_DEPTH), [[] for _ in range(MAX_TREE_DEPTH)])) leaves_at_depth_d[0].append('ROOT') # Iteratively construct tree, pruning at set rate ### PICKLE ALL STUFF FIRST pickled_data_for_MPI = [NQUBITS, NCLASSES, dev, config, X_train, y_train_ohe] with open(config['save_path'] + '/MPI_data.pickle', 'wb') as pdata: pickle.dump(pickled_data_for_MPI, pdata) for d in range(1, MAX_TREE_DEPTH): print(f"Depth = {d}") # Save trees if (SAVE_FREQUENCY > 0) & ~(d % SAVE_FREQUENCY): nx.write_gpickle(G, config['save_path'] + f'/tree_depth_{d}.pickle') # Plot trees if PLOT_INTERMEDIATE_TREES: plot_tree(G) # If we are not passed MIN_TREE_DEPTH, don't prune if d < MIN_TREE_DEPTH: # First depth connects to root if d == 1: tree_grow_root(G, leaves_at_depth_d, possible_embeddings) # At the embedding level we don't need to train because there are no params. for v in leaves_at_depth_d[d]: G.nodes[v]['W'] = 1.0 print('current graph: ', list(G.nodes(data=True))) # nx.set_node_attributes(G, {v: 1.0}, 'W') else: tree_grow(G, leaves_at_depth_d, d, possible_layers) best_arch = max(nx.get_node_attributes(G, 'W').items(), key=operator.itemgetter(1))[0] print('Current best architecture: ', best_arch) print('max W:', G.nodes[best_arch]['W']) # For every leaf, create a circuit and run the optimization. train_all_leaves_parallel(G, leaves_at_depth_d, d, config) else: # Check that we are at the correct prune depth step. if not (d - MIN_TREE_DEPTH) % PRUNE_DEPTH_STEP: print('Prune Tree') best_arch = max(nx.get_node_attributes(G, 'W').items(), key=operator.itemgetter(1))[0] print('Current best architecture: ', best_arch) print('max W:', G.nodes[best_arch]['W']) # print(nx.get_node_attributes(G,'W')) tree_prune(G, leaves_at_depth_d, d, PRUNE_RATE) print('Grow Pruned Tree') tree_grow(G, leaves_at_depth_d, d, possible_layers) # For every leaf, create a circuit and run the optimization. train_all_leaves_parallel(G, leaves_at_depth_d, d, config) else: print('Grow Tree') best_arch = max(nx.get_node_attributes(G, 'W').items(), key=operator.itemgetter(1))[0] print('Current best architecture: ', best_arch) print('max W:', G.nodes[best_arch]['W']) tree_grow(G, leaves_at_depth_d, d, possible_layers) train_all_leaves_parallel(G, leaves_at_depth_d, d, config) best_arch = max(nx.get_node_attributes(G, 'W').items(), key=operator.itemgetter(1))[0] print('architecture with max W: ', best_arch) print('max W:', G.nodes[best_arch]['W']) print('weights: ', G.nodes[best_arch]['weights']) import pandas as pd pd.DataFrame.from_dict(nx.get_node_attributes(G, 'W'), orient='index').to_csv('tree_weights.csv')
for i in range(len(a)): if a[i] * b[i] >= 0: total_correct += 1 #else: # print("incorrect") return (len(a) - total_correct) / len(a) # ## Init and visualize # take the dataset, and parse it do that all data is in R X, Y = create_data() # PCA X, Y = prep_data(X, Y, feature_dim) # check if we still have unique data after dimensionality reduction if not (len(X) == len(np.unique(X, axis=0))): print(len(X), len(np.unique(X, axis=0))) assert False, "DATA NOT UNIQUE, DUPLICATES DETECTED!" # make sure we have balanced data X_neg = [] X_pos = [] Y_neg = [] Y_pos = [] for i in range(len(Y)): if Y[i] < 0: X_neg.append(X[i]) Y_neg.append(Y[i]) else: X_pos.append(X[i]) Y_pos.append(Y[i])