def take_from_dist_2(size: int): """ Used in the non-stationary process learning as an updated process """ data = np.array([]) while len(data) < size: dif = size - len(data) take_size = (dif + 10) * 2 new_data = np.random.lognormal(mean=2, sigma=1, size=take_size) new_data, _ = discretize_and_truncate(new_data, bounds, num_qubits) if len(new_data) > dif: new_data = new_data[-dif:] data = np.append(data, new_data) return data
def take_from_dist_1(size: int): """ Used as an underlying process """ data = np.array([]) while len(data) < size: dif = size - len(data) take_size = (dif + 10) * 2 new_data = np.random.lognormal(mean=1, sigma=1, size=take_size) new_data, _ = discretize_and_truncate(new_data, bounds, num_qubits) if len(new_data) > dif: new_data = new_data[-dif:] data = np.append(data, new_data) return data
def __init__(self, data: np.ndarray, bounds: Optional[np.ndarray] = None, num_qubits: Optional[np.ndarray] = None, batch_size: int = 500, num_epochs: int = 3000, seed: int = 7, discriminator: Optional[DiscriminativeNetwork] = None, generator: Optional[GenerativeNetwork] = None, tol_rel_ent: Optional[float] = None, snapshot_dir: Optional[str] = None) -> None: """ Args: data: training data of dimension k bounds: k min/max data values [[min_0,max_0],...,[min_k-1,max_k-1]] if univariate data: [min_0,max_0] num_qubits: k numbers of qubits to determine representation resolution, i.e. n qubits enable the representation of 2**n values [num_qubits_0,..., num_qubits_k-1] batch_size: batch size, has a min. value of 1. num_epochs: number of training epochs seed: random number seed discriminator: discriminates between real and fake data samples generator: generates 'fake' data samples tol_rel_ent: Set tolerance level for relative entropy. If the training achieves relative entropy equal or lower than tolerance it finishes. snapshot_dir: path or None, if path given store cvs file with parameters to the directory Raises: AquaError: invalid input """ validate_min('batch_size', batch_size, 1) super().__init__() if data is None: raise AquaError('Training data not given.') self._data = np.array(data) if bounds is None: bounds_min = np.percentile(self._data, 5, axis=0) bounds_max = np.percentile(self._data, 95, axis=0) bounds = [] for i, _ in enumerate(bounds_min): bounds.append([bounds_min[i], bounds_max[i]]) if np.ndim(data) > 1: if len(bounds) != (len(num_qubits) or len(data[0])): raise AquaError('Dimensions of the data, the length of the data bounds ' 'and the numbers of qubits per ' 'dimension are incompatible.') else: if (np.ndim(bounds) or len(num_qubits)) != 1: raise AquaError('Dimensions of the data, the length of the data bounds ' 'and the numbers of qubits per ' 'dimension are incompatible.') self._bounds = np.array(bounds) self._num_qubits = num_qubits # pylint: disable=unsubscriptable-object if np.ndim(data) > 1: if self._num_qubits is None: self._num_qubits = np.ones[len(data[0])]*3 else: if self._num_qubits is None: self._num_qubits = np.array([3]) self._data, self._data_grid, self._grid_elements, self._prob_data = \ discretize_and_truncate(self._data, self._bounds, self._num_qubits, return_data_grid_elements=True, return_prob=True, prob_non_zero=True) self._batch_size = batch_size self._num_epochs = num_epochs self._snapshot_dir = snapshot_dir self._g_loss = [] self._d_loss = [] self._rel_entr = [] self._tol_rel_ent = tol_rel_ent self._random_seed = seed if generator is None: self.set_generator() else: self._generator = generator if discriminator is None: self.set_discriminator() else: self._discriminator = discriminator self.seed = self._random_seed self._ret = {}
def __init__(self, data: Union[np.ndarray, List], bounds: Optional[Union[np.ndarray, List]] = None, num_qubits: Optional[Union[np.ndarray, List]] = None, batch_size: int = 500, num_epochs: int = 3000, seed: int = 7, discriminator: Optional[DiscriminativeNetwork] = None, generator: Optional[GenerativeNetwork] = None, tol_rel_ent: Optional[float] = None, snapshot_dir: Optional[str] = None, quantum_instance: Optional[ Union[QuantumInstance, BaseBackend, Backend]] = None) -> None: """ Args: data: Training data of dimension k bounds: k min/max data values [[min_0,max_0],...,[min_k-1,max_k-1]] if univariate data: [min_0,max_0] num_qubits: k numbers of qubits to determine representation resolution, i.e. n qubits enable the representation of 2**n values [num_qubits_0,..., num_qubits_k-1] batch_size: Batch size, has a min. value of 1. num_epochs: Number of training epochs seed: Random number seed discriminator: Discriminates between real and fake data samples generator: Generates 'fake' data samples tol_rel_ent: Set tolerance level for relative entropy. If the training achieves relative entropy equal or lower than tolerance it finishes. snapshot_dir: Directory in to which to store cvs file with parameters, if None (default) then no cvs file is created. quantum_instance: Quantum Instance or Backend Raises: AquaError: invalid input """ warn_package('aqua.algorithms.distribution_learners', 'qiskit_machine_learning.algorithms.distribution_learners', 'qiskit-machine-learning') validate_min('batch_size', batch_size, 1) super().__init__(quantum_instance) if data is None: raise AquaError('Training data not given.') self._data = np.array(data) if bounds is None: bounds_min = np.percentile(self._data, 5, axis=0) bounds_max = np.percentile(self._data, 95, axis=0) bounds = [] # type: ignore for i, _ in enumerate(bounds_min): bounds.append([bounds_min[i], bounds_max[i]]) # type: ignore if np.ndim(data) > 1: if len(bounds) != (len(num_qubits) or len(data[0])): raise AquaError('Dimensions of the data, the length of the data bounds ' 'and the numbers of qubits per ' 'dimension are incompatible.') else: if (np.ndim(bounds) or len(num_qubits)) != 1: raise AquaError('Dimensions of the data, the length of the data bounds ' 'and the numbers of qubits per ' 'dimension are incompatible.') self._bounds = np.array(bounds) self._num_qubits = num_qubits # pylint: disable=unsubscriptable-object if np.ndim(data) > 1: if self._num_qubits is None: self._num_qubits = np.ones[len(data[0])] * 3 # type: ignore else: if self._num_qubits is None: self._num_qubits = np.array([3]) self._data, self._data_grid, self._grid_elements, self._prob_data = \ discretize_and_truncate(self._data, self._bounds, self._num_qubits, return_data_grid_elements=True, return_prob=True, prob_non_zero=True) self._batch_size = batch_size self._num_epochs = num_epochs self._snapshot_dir = snapshot_dir self._g_loss = [] # type: List[float] self._d_loss = [] # type: List[float] self._rel_entr = [] # type: List[float] self._tol_rel_ent = tol_rel_ent self._random_seed = seed if generator is None: self.set_generator() else: self._generator = generator if discriminator is None: self.set_discriminator() else: self._discriminator = discriminator self.seed = self._random_seed self._ret = {} # type: Dict[str, Any]
def _update(self, data: np.ndarray): print('Updating data...') if self._freq_storage: print('Old grid elements: ', self._grid_elements) print('Old data probabilities: ', self._prob_data) print('Old data count: ', self._prob_data_length) print('New data count: ', len(data)) print('New data: ', data) new_data_length = len(data) _, _, new_grid_elements, new_prob_data = \ discretize_and_truncate(data, self._bounds, self._num_qubits, return_data_grid_elements=True, return_prob=True, prob_non_zero=True) # Common merged grid elements temp_grid_elements = np.unique( np.concatenate((self._grid_elements, new_grid_elements), 0)) temp_prob_data = np.zeros(len(temp_grid_elements)) for j, sample in enumerate(temp_grid_elements): for i, element in enumerate(self._grid_elements): if sample == element: temp_prob_data[ j] += self._prob_data[i] * self._prob_data_length break for i, element in enumerate(new_grid_elements): if sample == element: temp_prob_data[j] += new_prob_data[i] * new_data_length break # Normalize data temp_prob_data[j] /= (self._prob_data_length + new_data_length) self._prob_data_length += new_data_length self._grid_elements = temp_grid_elements self._prob_data = temp_prob_data print('Processed data count: ', self._prob_data_length) print('Processed grid elements: ', self._grid_elements) print('Processed data probabilities: ', self._prob_data) if self._prob_data_real is not None: print('Unknown real data probabilities: ', self._prob_data_real) print('') else: print('Old data count: ', len(self._data)) print('New data count: ', len(data)) print('New data: ', data) if self._max_data_length is None: self._data = np.append(self._data, np.array(data)) else: elements_left = self._max_data_length - len(data) if elements_left > 0: self._data = np.append(self._data[-elements_left:], np.array(data)) else: self._data = np.array(data[-self._max_data_length:]) self._data, _, self._grid_elements, self._prob_data = \ discretize_and_truncate(self._data, self._bounds, self._num_qubits, return_data_grid_elements=True, return_prob=True, prob_non_zero=True) print('Processed data count: ', len(self._data)) print('Processed grid elements: ', self._grid_elements) print('Processed data probabilities: ', self._prob_data) if self._prob_data_real is not None: print('Unknown real data probabilities: ', self._prob_data_real) print('')
data = np.array([]) while len(data) < size: dif = size - len(data) take_size = (dif + 10) * 2 new_data = np.random.lognormal(mean=2, sigma=1, size=take_size) new_data, _ = discretize_and_truncate(new_data, bounds, num_qubits) if len(new_data) > dif: new_data = new_data[-dif:] data = np.append(data, new_data) return data # Unknown target, used just for tests unknown_target_data = take_from_dist_1(1000000) _, _, _, unknown_target_data_prob = discretize_and_truncate( unknown_target_data, bounds, num_qubits, return_data_grid_elements=True, return_prob=True, prob_non_zero=True ) # Predefined unknown target data probabilities #unknown_target_data_prob = [0.01935999999999938, 0.28349000000014274, 0.45547000000031473, 0.24168000000010093] # Number of initial data samples N = 1*batch_size # Maximum data length for concept drift max_data_length = None # Use frequency histogram as a storage instead of data stack freq_storage = False # Initial dataset real_data = take_from_dist_1(N) print('Data bounds:', bounds) print('Batch size:', batch_size)