def _save_num_test(): mat2ops = [matrix_to_op.one_particle, matrix_to_op.multi_particle] metadata = { 'description': '"nbr_terms" is a list of numpy arrays. Each array ' 'contains the number of terms in a typical ' 'Hamiltonian for different matrix-sizes. ' 'See "mat_to_op" in metadata for the ' 'matrix_to_operator and ' '"mat_size" in data for matrix-sizes. The ' f'result is an average of 100 matrices.', 'mat_to_op': [mat2op.__name__ for mat2op in mat2ops] } file = 'mat_to_op and ansatz/num_terms' nbr_terms = [] mat_size = [] for mat2op in mat2ops: tmp_matrix_size, tmp_nbr_terms = _num_terms(mat2op) mat_size.append(tmp_matrix_size) nbr_terms.append(tmp_nbr_terms) data_ = {'gates': nbr_terms, 'mat_size': mat_size} data.save(file=file, data=data_, metadata=metadata, protocol=1)
def _init_metadata(identifier_generator, directory, script_file, force=False): """ Initialize metadata. :param identifier_generator: id generator :param directory: directory :param script_file: name of script :param force: if False raise RuntimeError if metadata is already existing. :return: """ if not force and _get_metadata(directory, warn=False) != ([], {}): raise RuntimeError('Metadata has already been initialized.') start_time = perf_counter() # Try to load from data/directory metadata, metametadata = _get_metadata(directory, False, data.BASE_DIR) # Fix directory and path path_metadata = join(directory, 'total', 'metadata') if (metadata, metametadata) == ([], {}): # Initialize from scratch # Save metadata file metadata = [] metametadata = { 'description': "File that keeps track of what's been done " "previously in this script " f"({script_file}).", 'created_from': script_file } data.save(file=path_metadata, data=metadata, metadata=metametadata, extract=True, base_dir=base_dir) # Add identifiers count = 0 for identifier in identifier_generator: count += 1 if count % 1e4 == 0: print(f'{count} identifiers saved.') data.append(path_metadata, [identifier, False], base_dir=base_dir) else: print(f'Initializing metadata from {join(data.BASE_DIR, directory)}.') count = len(metadata) data.save(file=path_metadata, data=metadata, metadata=metametadata, extract=True, base_dir=base_dir) stop_time = perf_counter() print(f'\nMetadata initialization completed in' f'{stop_time - start_time: .1f} s with {count} identifiers.\n')
def NM_save(version, size, ansatz_name, minimizer): base_dir = join(ROOT_DIR, f'data/final_nm/v{version}') data_file = f'{ansatz_name}_{minimizer}_size={size}.pkl' data_ = data.load(data_file, base_dir)[0] fel = [] samples_lst = [] max_meas = np.linspace(50000, 3e6, 60) nr = [] identifier_set = set() for i, y in enumerate(data_): print(f'Klar med {i+1}/{len(data_)}') identifier, result = y if identifier in identifier_set: continue identifier_set.add(identifier) samples = identifier[6] x = result['iteration_params_all'] fun = result['expectation_vals_all'] eig = result['correct'] fun_evals = np.round(max_meas / samples) fun_evals[fun_evals == 0] = 1 fel_none = NM_fel_measmax.fel_measmax(x, fun, identifier, fun_evals) error = (eig - fel_none) / eig * 100 if samples not in samples_lst: samples_lst.append(samples) print(samples_lst) fel.append(error) nr.append(1) elif samples in samples_lst: idx = np.argmin(np.abs(np.array(samples_lst) - samples)) fel[idx] = (fel[idx] + error) nr[idx] += 1 print(nr) fel = np.array(fel) print(nr) for i, rows in enumerate(fel): fel[i, :] = rows / nr[i] samples_lst = np.array(samples_lst) sort_idx = np.argsort(samples_lst) samples_lst = samples_lst[sort_idx] fel = fel[sort_idx, :] file = f'NM_heatmap/v{version}/{ansatz_name}_{minimizer}_size={size}.pkl' data2 = max_meas, samples_lst, fel data.save(file, data2, extract=True)
def NM_save(version, size, ansatz_name, matidx=None): base_dir = join(ROOT_DIR, f'data/final_nm/v{version}') data_file = f'{ansatz_name}_nelder-mead_size={size}.pkl' data_ = data.load(data_file, base_dir)[0] max_meas_lst = np.linspace(50000, 3e6, 60) samples_lst = np.linspace(2750, 256500, 36) fel = np.zeros([36, 60]) nr = np.zeros([36, 60]) identifier_set = set() for y in data_: identifier, result = y samples = identifier[-2] if identifier in identifier_set or samples > 256500: continue identifier_set.add(identifier) i = np.argmin(np.abs(samples_lst - samples)) for j, max_meas in enumerate(max_meas_lst): fun_evals = int(round(max_meas / samples)) if fun_evals <= 4: continue funs = np.array(result['expectation_vals_all'][:fun_evals]) params = np.array(result['iteration_params_all'][:fun_evals]) idx = np.argmin(funs) x = params[idx, :] idx_lst = np.linalg.norm(params - x, axis=1) <= 1e-3 fun = np.mean(funs[idx_lst]) eig = result['correct'] fel[i, j] += np.abs((fun - eig) / eig * 100) nr[i, j] += 1 for rows in nr: print(rows) for i in range(36): for j in range(60): if nr[i, j] != 0: fel[i, j] /= nr[i, j] if matidx == None: file = f'heatmap_data/v2/nm_{ansatz_name}_size={size}.pkl' else: file = f'heatmap_data/v2/nm_{ansatz_name}_size={size}_matidx={matidx}.pkl' data2 = max_meas_lst, samples_lst, fel data.save(file, data2, extract=True)
def bayes_save(version, size, ansatz_name, minimizer): base_dir = join(ROOT_DIR, f'data/final_bayes/v{version}') data_file = f'{ansatz_name}_{minimizer}_size={size}.pkl' data_ = data.load(data_file, base_dir)[0] fel = np.zeros([36, 60]) samples_lst = [] max_meas_lst = [] nr = np.zeros([36, 60]) for i, y in enumerate(data_): identifier, result = y samples = identifier[6] max_meas = identifier[5] arr = np.asarray(np.abs(np.linspace(2750, 256500, 36) - samples)) samples_idx = np.argmin(arr) max_meas_idx = np.argmin( np.abs(np.linspace(50000, 3e6, 60) - max_meas)) eig = result['correct'] fun_none = result['fun_none'] error = (eig - fun_none) / eig * 100 max_meas_lst.append(max_meas) samples_lst.append(samples) fel[samples_idx][max_meas_idx] += error nr[samples_idx][max_meas_idx] += 1 for j in range(36): for k in range(60): if np.any(nr[j, k] != 0): fel[j, k] /= nr[j, k] else: fel[j, k] = 5 print(fel.shape[0]) file = f'NM_heatmap/v{version}/{ansatz_name}_{minimizer}_size={size}.pkl' data2 = np.linspace(50000, 3e6, 60), np.linspace(2750, 256500, 36), fel data.save(file, data2, extract=True)
def compare_and_save(): ansatzs = [ ansatz.one_particle, ansatz.one_particle_ucc, ansatz.multi_particle_stereographic, ansatz.multi_particle_ucc ] ansatzs = [ansatz.multi_particle_stereographic] m = 100 metadata = { 'description': '"gates" is a list of numpy arrays. Each array ' 'contains the number of gates in a typical ' 'ansatz-program for different matrix-sizes. ' 'See "ansatz" in metadata for the ansätze and ' '"mat_size" in data for matrix-sizes. The ' f'result is an average of {m} random vectors.', 'ansatz': [ansatz_.__name__ for ansatz_ in ansatzs] } file = 'mat_to_op and ansatz/ansatz_depth' n_max = 100 max_ops = 10000 gates = [] mat_size = [] data_ = {'gates': gates, 'mat_size': mat_size} for ansatz_ in ansatzs: nbr_ops = _test_depth(ansatz_, n_max=n_max, max_ops=max_ops, m=m) gates.append(nbr_ops) mat_size.append( np.linspace(2, 1 + nbr_ops.size, nbr_ops.size, dtype=np.uint16)) data.save(file=file, data=data_, metadata=metadata)
def bayes_save(version, size, ansatz_name, matidx=None): base_dir = join(ROOT_DIR, f'data/final_bayes/v{version}') data_file1 = f'{ansatz_name}_bayes_size={size}_part_1.pkl' data_file2 = f'{ansatz_name}_bayes_size={size}_part_2.pkl' data1 = data.load(data_file1, base_dir)[0] data2 = data.load(data_file2, base_dir)[0] data_ = data1 + data2 fel = np.zeros([36, 60]) nr = np.zeros([36, 60]) samples_lst = np.zeros([36]) max_meas_lst = np.zeros([60]) fun_evals_lst = np.zeros([36, 60]) identifier_set = set() for i, y in enumerate(data_): identifier, result = y samples = identifier[-1] max_meas = identifier[-2] fun_evals = int(round(max_meas / samples)) if identifier in identifier_set or samples > 256500 or not ( matidx != None and matidx == identifier[4]): continue identifier_set.add(identifier) samples_idx = np.argmin(np.abs(np.linspace(2750, 256500, 36) - samples)) max_meas_idx = np.argmin(np.abs(np.linspace(50000, 3e6, 60) - max_meas)) eig = result['correct'] fun = result['fun'] error = abs((eig - fun) / eig * 100) max_meas_lst[max_meas_idx] = max_meas samples_lst[samples_idx] = samples fun_evals_lst[samples_idx, max_meas_idx] = fun_evals if 4 < int(round(max_meas / samples)) <= 300: fel[samples_idx, max_meas_idx] += error nr[samples_idx, max_meas_idx] += 1 for i in range(36): for j in range(60): if nr[i, j] != 0: fel[i, j] /= nr[i, j] for i in range(36): idx_dict = {} for idx, fun_evals in enumerate(fun_evals_lst[i, :]): if fun_evals not in idx_dict: idx_dict[fun_evals] = [] idx_dict[fun_evals].append(idx) for fun_evals, idx_lst in zip(idx_dict.keys(), idx_dict.values()): fel[i, idx_lst] = np.mean(fel[i, idx_lst]) for row in nr: print(row) if matidx == None: file = f'heatmap_data/v2/bayes_{ansatz_name}_size={size}.pkl' else: file = f'heatmap_data/v2/bayes_{ansatz_name}_size={size}_matidx={matidx}.pkl' data2 = np.linspace(50000, 3e6, 60), np.linspace(2750, 256500, 36), fel data.save(file, data2, extract=True)
path_metadata = join(directory, file + '_metadata.pkl') # Load/initialize metadata try: # Try to load the file (will raise FileNotFoundError if not existing) metadata, metametadata = data.load(path_metadata) except FileNotFoundError: metadata = [] metametadata = { 'description': "File that keeps track of what's been done " "previously in this script " f"({basename(__file__)})." } data.save(file=path_metadata, data=metadata, metadata=metametadata, extract=True) # Extract identifiers to previously completed simulations ids = set() for id_, value in metadata: if value is True: if id_ in ids: raise KeyError('Multiple entries for same id in metadata.') ids.add(id_) # Cleanup (metadata could contain hundreds of Exception objects) del metadata, metametadata # TODO: identifiers to iterate over, should yield a tuple that uniquely
def _cleanup_big(identifier_generator, directory, script_file): """ Cleanup directory by going trough all subdirectories, collect results and fix metadata. :param directory: Directory of cleanup. :return: """ # TODO: collect metadata regarding time, success rate, etc. from # mac-subdirs to total/metadata, save as dict with mac as key(?) start_time = perf_counter() # Get total/metadata metadata, metametadata = _get_metadata(directory, warn=False) if (metadata, metametadata) == ([], {}): _init_metadata(identifier_generator, directory, script_file) # Try again metadata, metametadata = _get_metadata(directory, warn=False) if (metadata, metametadata) == ([], {}): raise ValueError("_init_metadata doesn't work") # Convert metadata to dict meta_dict = {} for x in metadata: # Keep only the last exception for given identifier. if x[0] not in meta_dict or meta_dict[x[0]] is not True: meta_dict[x[0]] = x[1] del metadata # Find mac-subdirectories subdirs = set() with os.scandir(join(base_dir, directory)) as it: for entry in it: if entry.is_dir() and entry.name.isdigit(): subdirs.add(join(base_dir, directory, entry.name)) # Add data/directory to subdirs to not overwrite data from other # file-systems data_dir = join(data.BASE_DIR, directory) if isdir(data_dir): subdirs.add(data_dir) # Find data-files and keep track of which exists in which subdir files = {} for subdir in subdirs: with os.scandir(subdir) as it: for entry in it: if entry.is_file() and entry.name != 'metadata.pkl': if entry.name not in files: files[entry.name] = [] files[entry.name].append(subdir) # Go through files, create file in total (if not existing), add data # from files in subdirs and update meta_dict count = 0 metadata = None # To not crash at del metadata if no file. for file in files: print(f"\nSaving results in {join('total', file)}." "\nUsing data from the following directories:") # Load file from total try: content, metadata = data.load(file=join(directory, 'total', file), base_dir=base_dir) except FileNotFoundError: content, metadata = [], {} # Convert content to dict content_dict = _add_result_to_dict(content, {}) del content # Add content from other files for subdir in files[file]: print(subdir) content_new, metadata_new = data.load(file=join(subdir, file), base_dir='') content_dict = _add_result_to_dict(content_new, content_dict) # Change metadata if no previous if metadata == {}: metadata = metadata_new metadata['created_by'] = data.get_name() metadata['created_datetime'] = datetime.now().\ strftime("%Y-%m-%d, %H:%M:%S") # Convert content_dict back to list and update meta_dict content = [] for id_ in content_dict: if id_ not in meta_dict or meta_dict[id_] is not True: meta_dict[id_] = True for result in content_dict[id_]: count += 1 content.append([id_, result]) # Save file data.save(file=join(directory, 'total', file), data=content, metadata=metadata, extract=True, base_dir=base_dir, disp=False) del metadata print('\nFinishing cleanup.') # Convert meta_dict back to list and save metadata = [] for id_ in meta_dict: metadata.append([id_, meta_dict[id_]]) data.save(file=join(directory, 'total', 'metadata'), base_dir=base_dir, data=metadata, metadata=metametadata, extract=True, disp=False) # Update metadata in subdirs for subdir in subdirs: try: metametadata_new = data.load(file=join(subdir, 'metadata'), base_dir='')[1] except FileNotFoundError: metametadata_new = metametadata data.save(file=join(subdir, 'metadata'), base_dir='', data=metadata, metadata=metametadata_new, extract=True, disp=False) # Copy content of base_dir/directory/total to data.BASE_DIR/directory destination = join(data.BASE_DIR, directory) os.makedirs(destination, exist_ok=True) with os.scandir(join(base_dir, directory, 'total')) as it: for entry in it: if entry.is_file(): shutil.copy(entry.path, destination) remaining = sum(x is not True for x in meta_dict.values()) # Print some stats stop_time = perf_counter() print(f'\nCleanup completed in {stop_time - start_time:.1f} s.') print(f'A total of {len(metadata)} identifiers where handled, {count} ' f'results saved and {remaining} tasks remaining.\n')
def _run_internal(simulate, identifier_generator, input_functions, directory, script_file, file_from_id, metadata_from_id, num_workers, start_range, stop_range, max_task, chunksize, debug): """ Internal parallel run. :return: number of remaining tasks. """ # Files and paths directory_nomac = directory directory = join(directory, str(mac)) path_metadata = join(directory, 'metadata') try: # Try to load the file (will raise FileNotFoundError if not existing) metadata, metametadata = data.load(path_metadata, base_dir=base_dir) except FileNotFoundError: metadata = _get_metadata(directory_nomac)[0] metametadata = { 'description': "File that keeps track of what's been done " "previously in this script " f"({script_file}).", 'run_time': [], 'num_workers': [], 'num_tasks_completed': [], 'success_rate': [], 'created_from': script_file } data.save(file=path_metadata, data=metadata, metadata=metametadata, extract=True, base_dir=base_dir) # Extract identifiers to previously completed simulations ids = set() for id_, value in metadata: if value is True: if id_ in ids: raise KeyError('Multiple entries for same id in metadata.') ids.add(id_) # Cleanup (metadata can contain thousands of Exception objects) del metadata, metametadata # Wrap simulate to get expected input/output and handle exceptions wrap = Wrap(simulate, debug=debug) # Generator for pool generator = Bookkeeper(identifier_generator, ids, input_functions, [start_range, stop_range]) # Counters and such files = set() success = 0 fail = 0 start_time = perf_counter() # Actual run try: with Pool(num_workers, maxtasksperchild=max_task) as p: result_generator = p.imap_unordered(wrap, generator, chunksize=chunksize) for identifier, result in result_generator: # Handle exceptions: if isinstance(result, Exception): fail += 1 # Save the error data.append(path_metadata, [identifier, result], base_dir=base_dir) else: success += 1 file = file_from_id(identifier) if file not in files: files.add(file) if not isfile(join(base_dir, directory, file + '.pkl')): # Create file metadata = metadata_from_id(identifier) data.save(join(directory, file), [], metadata, extract=True, base_dir=base_dir) data.append(join(directory, file), [identifier, result], base_dir=base_dir) # Mark the task as completed (last in the else, # after saving result) data.append(path_metadata, [identifier, True], base_dir=base_dir) except Exception as e: if debug: raise e finally: stop_time = perf_counter() total = success + fail if total == 0: total = 1 # To avoid division by zero # Post simulation. metadata, metametadata = data.load(path_metadata, base_dir=base_dir) metadata = _cleanup_small(metadata) metametadata['run_time'].append(stop_time - start_time) metametadata['num_workers'].append((num_workers, max_num_workers)) metametadata['num_tasks_completed'].append(success) metametadata['success_rate'].append(success / total) data.save(file=path_metadata, data=metadata, metadata=metametadata, extract=True, base_dir=base_dir) # Print some stats print('\nSimulation completed.') print(f'Total number of tasks this far: {len(metadata)}') print(f'Completed tasks this run: {success}') print(f'Success rate this run: {success / total}') remaining = len(metadata) - sum(x[1] for x in metadata if x[1] is True) print(f'Minimum number of tasks remaining for this run: {fail}') print(f'Total number of tasks remaining: {remaining}') # No success and no fail => no restart if not debug: return success + fail