def sweep_setup(config_path): config = load_config_file(config_path) db.init_database(config["database_connection_string"]) session = db.get_session() directional_atom_sweep_points = config['directional_atom_sweep_points'] sigma_sweep_points = config['sigma_sweep_points'] epsilon_sweep_points = config['epsilon_sweep_points'] scfg = config['structure_parameters'] eps_d = np.linspace(*scfg['epsilon_limits'], epsilon_sweep_points) sig_d = np.linspace(*scfg['sigma_limits'], sigma_sweep_points) atoms_d = np.linspace(*scfg['directional_atom_limits'], directional_atom_sweep_points, dtype=int) atom_diameter = scfg['atom_diameter'] print("epsilons: ", eps_d) print("sigmas: ", sig_d) print("num_atoms: ", atoms_d) for eps in eps_d: for sig in sig_d: for num_atoms in atoms_d: material = Material.cube_pore_new(sig, eps, num_atoms, atom_diameter) session.add(material) session.commit()
def run_materials(config_path, workers=(1,1)): config = load_config_file(config_path) db.init_database(config["database_connection_string"]) session = db.get_session() mats = session.query(Material).all() num_workers, worker_num = workers mats = mats[worker_num - 1::num_workers] print(len(mats)) for m in mats: print("---------------") print("%d" % m.id) run_all_simulations(m, config) session.add(m) session.commit()
def output_config_files(config_path, material_ids, database_path=None): config = load_config_file(config_path) db.init_database(db.get_sqlite_dbcs(database_path)) session = db.get_session() from htsohm.db import Material for m_id in material_ids: m = session.query(Material).get(m_id) for i in config["simulations"]: simcfg = config["simulations"][i] output_dir = "output_%d_%s_%s_%d" % (m.id, m.uuid[0:8], simcfg["type"], i) os.makedirs(output_dir, exist_ok=True) sim = getattr(simulate, simcfg["type"]) sim.write_output_files(m, simcfg, output_dir)
def init_worker(worker_metadata): """initialization function for worker that inits the database and gets a worker-specific session.""" global config global generator global gen global worker_session generator, config, gen = worker_metadata _, worker_session = db.init_database(config["database_connection_string"], void_fraction_subtype=config['void_fraction_subtype']) return
def sweep_setup(config_path): config = load_config_file(config_path) db.init_database(config["database_connection_string"]) session = db.get_session() if 'sweep_points' in config: lattice_sweep_points = sigma_sweep_points = epsilon_sweep_points = config['sweep_points'] else: lattice_sweep_points = config['lattice_sweep_points'] sigma_sweep_points = config['sigma_sweep_points'] epsilon_sweep_points = config['epsilon_sweep_points'] scfg = config['structure_parameters'] eps_d = np.linspace(*scfg['epsilon_limits'], epsilon_sweep_points) sig_d = np.linspace(*scfg['sigma_limits'], sigma_sweep_points) a_d = np.linspace(*scfg['lattice_constant_limits'], lattice_sweep_points) # always do symmetrical with one-atom only lattice_coords = [(a, a, a) for a in a_d] # b_d = np.linspace(*scfg['lattice_constant_limits'], config['sweep_points']) # c_d = np.linspace(*scfg['lattice_constant_limits'], config['sweep_points']) # lattice_coords = np.array(np.meshgrid(a_d, b_d, c_d)).T.reshape(-1,3) # # remove symmetrical points # lattice_coords = map(sorted, lattice_coords) # lattice_coords = set(map(tuple, lattice_coords)) print("epsilons: ", eps_d) print("sigmas: ", sig_d) print("lattice constants: ", a_d) for eps in eps_d: for sig in sig_d: for coords in lattice_coords: material = Material.one_atom_new(sig, eps, *coords) session.add(material) session.commit()
def output_material_csv(database_path, ids): db.init_database(db.get_sqlite_dbcs(database_path)) session = db.get_session() output_materials_csvs_from_db(session, ids)
def output_atom_sites_csv(database_path): db.init_database(db.get_sqlite_dbcs(database_path)) session = db.get_session() output_atom_sites_csv_from_db(session)
def output_csv(database_path, start_id=0): db.init_database(db.get_sqlite_dbcs(database_path)) session = db.get_session() output_csv_from_db(session, start_id)
def bin_graph(config_path, database_path=None, csv_path=None, last_material=None, sigma_limits=None, epsilon_limits=None, addl_data_path=None, last_children=0): config = load_config_file(config_path) VoidFraction.set_column_for_void_fraction(config['void_fraction_subtype']) prop1range = config['prop1range'] prop2range = config['prop2range'] num_bins = config['number_of_convergence_bins'] # vf_binunits = (prop1range[1] - prop1range[0]) / num_bins # ml_binunits = (prop2range[1] - prop2range[0]) / num_bins print("loading materials...") if csv_path: mats_r = np.loadtxt(csv_path, delimiter=',', skiprows=1, usecols=(12,13,5,6), max_rows=last_material) print("%d rows loaded from csv" % mats_r.shape[0]) if sigma_limits: mats_r = mats_r[(sigma_limits[0] <= mats_r[:,2]) & (mats_r[:,2] <= sigma_limits[1])] print("%d rows after applying sigma limits" % mats_r.shape[0]) if epsilon_limits: mats_r = mats_r[(epsilon_limits[0] <= mats_r[:,3]) & (mats_r[:,3] <= epsilon_limits[1])] print("%d rows after applying epsilon limits" % mats_r.shape[0]) else: db.init_database(db.get_sqlite_dbcs(database_path)) session = db.get_session() mats_d = session.query(Material).options(joinedload("void_fraction"), joinedload("gas_loading")) if last_material: mats_d = mats_d.limit(last_material).all() else: mats_d = mats_d.all() print("calculating material properties...") mats_r = [(m.void_fraction[0].get_void_fraction(), m.gas_loading[0].absolute_volumetric_loading) for m in mats_d] last_generation_start = len(mats_r) - last_children print("calculating bins...") bin_counts = np.zeros((num_bins, num_bins)) start_bins = calc_bins(mats_r[0:last_generation_start], num_bins, prop1range=prop1range, prop2range=prop2range) for i, (bx, by) in enumerate(start_bins): bin_counts[bx,by] += 1 bins_explored = np.count_nonzero(bin_counts) new_bins = calc_bins(mats_r[last_generation_start:], num_bins, prop1range=prop1range, prop2range=prop2range) print(len(new_bins), len(start_bins), len(set(new_bins) - set(start_bins))) new_bins = set(new_bins) - set(start_bins) print("bins explored = %d" % bins_explored) children = [] parents = [] if last_children > 0: children = np.array(mats_r[last_generation_start:]) parent_ids = np.array([m.parent_id for m in mats_d[last_generation_start:]]) parents = np.array([mats_r[pid - 1] for pid in parent_ids]) addl_data = None if addl_data_path: print("adding additional data from: %s" % addl_data_path) addl_data = np.loadtxt(addl_data_path, delimiter=",", skiprows=1, usecols=(1,2)) print("outputting graph...") output_path = "binplot_%d_materials.png" % len(mats_r) delaunay_figure(mats_r, num_bins, output_path, bins=bin_counts, new_bins=new_bins, title="%d Materials: %d/%d %5.2f%%" % (len(mats_r), bins_explored, num_bins ** 2, 100*float(bins_explored / num_bins ** 2)), prop1range=prop1range, prop2range=prop2range, show_triangulation=False, show_hull=False, addl_data_set=addl_data, children=children, parents=parents)
def htsohm_run(config_path, restart_generation=-1, override_db_errors=False, num_processes=1, max_generations=None): def _update_bins_counts_materials(all_bins, bins, start_index): nonlocal bin_counts, bin_materials for i, (bx, by) in enumerate(all_bins): bin_counts[bx,by] += 1 bin_materials[bx][by].append(i + start_index) new_bins = set(all_bins) - bins return new_bins, bins.union(new_bins) config = load_config_file(config_path) os.makedirs(config['output_dir'], exist_ok=True) print(config) children_per_generation = config['children_per_generation'] prop1range = config['prop1range'] prop2range = config['prop2range'] num_bins = config['number_of_convergence_bins'] benchmarks = config['benchmarks'] next_benchmark = benchmarks.pop(0) last_benchmark_reached = False load_restart_path = config['load_restart_path'] if max_generations is None: max_generations = config['max_generations'] engine, session = db.init_database(config["database_connection_string"], backup=(load_restart_path != False or restart_generation > 0), void_fraction_subtype=config['void_fraction_subtype']) print('{:%Y-%m-%d %H:%M:%S}'.format(datetime.now())) if restart_generation >= 0: print("Restarting from database using generation: %s" % restart_generation) box_d, box_r, bin_counts, bin_materials, bins, start_gen = load_restart_db( restart_generation, num_bins, prop1range, prop2range, session) print("Restarting at generation %d\nThere are currently %d materials" % (start_gen, len(box_r))) check_db_materials_for_restart(len(box_r), session, delete_excess=override_db_errors) elif load_restart_path: print("Restarting from file: %s" % load_restart_path) box_d, box_r, bin_counts, bin_materials, bins, start_gen = load_restart(load_restart_path) print("Restarting at generation %d\nThere are currently %d materials" % (start_gen, len(box_r))) check_db_materials_for_restart(len(box_r), session, delete_excess=override_db_errors) else: if session.query(Material).count() > 0: print("ERROR: cannot have existing materials in the database for a new run") sys.exit(1) # generate initial generation of random materials print("applying random seed to initial points: %d" % config['initial_points_random_seed']) random.seed(config['initial_points_random_seed']) box_d, box_r = parallel_simulate_generation(generator.random.new_material, num_processes, None, config, gen=0, children_per_generation=config['children_per_generation']) random.seed() # flush the seed so that only the initial points are set, not generated points # setup initial bins bin_counts = np.zeros((num_bins, num_bins)) bin_materials = empty_lists_2d(num_bins, num_bins) all_bins = calc_bins(box_r, num_bins, prop1range=prop1range, prop2range=prop2range) new_bins, bins = _update_bins_counts_materials(all_bins, set(), 0) output_path = os.path.join(config['output_dir'], "binplot_0.png") # delaunay_figure(box_r, num_bins, output_path, bins=bin_counts, \ # title="Starting random materials", show_triangulation=False, show_hull=False, \ # prop1range=prop1range, prop2range=prop2range) start_gen = 1 if config['generator_type'] == 'random': generator_method = generator.random.new_material elif config['generator_type'] == 'mutate': generator_method = generator.mutate.mutate_material for gen in range(start_gen, max_generations + 1): benchmark_just_reached = False # mutate materials and simulate properties parents_d, parents_r = select_parents(children_per_generation, box_d, box_r, bin_materials, config) new_box_d, new_box_r = parallel_simulate_generation(generator_method, num_processes, parents_d, config, gen=gen, children_per_generation=config['children_per_generation']) # track bins all_bins = calc_bins(new_box_r, num_bins, prop1range=prop1range, prop2range=prop2range) new_bins, bins = _update_bins_counts_materials(all_bins, bins, gen * children_per_generation) # output space + calced bins for i, box_r in enumerate(new_box_r): print("(%s) => (%s)" % (box_r, all_bins[i])) # evaluate algorithm effectiveness bin_fraction_explored = len(bins) / num_bins ** 2 print_block('GENERATION %s: %5.2f%%' % (gen, bin_fraction_explored * 100)) while bin_fraction_explored >= next_benchmark: benchmark_just_reached = True print_block("%s: %5.2f%% exploration accomplished at generation %d" % ('{:%Y-%m-%d %H:%M:%S}'.format(datetime.now()), bin_fraction_explored * 100, gen)) if benchmarks: next_benchmark = benchmarks.pop(0) else: last_benchmark_reached = True # if config['bin_graph_on'] and ( # (benchmark_just_reached or gen == config['max_generations']) or \ # (config['bin_graph_every'] > 0 and gen % config['bin_graph_every'] == 0)): # # output_path = os.path.join(config['output_dir'], "binplot_%d.png" % gen) # delaunay_figure(box_r, num_bins, output_path, children=new_box_r, parents=parents_r, # bins=bin_counts, new_bins=new_bins, # title="Generation %d: %d/%d (+%d) %5.2f%% (+%5.2f %%)" % # (gen, len(bins), num_bins ** 2, len(new_bins), # 100*float(len(bins)) / num_bins ** 2, 100*float(len(new_bins)) / num_bins ** 2 ), # patches=None, prop1range=prop1range, prop2range=prop2range, \ # perturbation_methods=["all"]*children_per_generation, show_triangulation=False, show_hull=False) # # if config['tri_graph_on'] and ( # (benchmark_just_reached or gen == config['max_generations']) or \ # (config['tri_graph_every'] > 0 and gen % config['tri_graph_every'] == 0)): # # output_path = os.path.join(config['output_dir'], "triplot_%d.png" % gen) # delaunay_figure(box_r, num_bins, output_path, children=new_box_r, parents=parents_r, # bins=bin_counts, new_bins=new_bins, # title="Generation %d: %d/%d (+%d) %5.2f%% (+%5.2f %%)" % # (gen, len(bins), num_bins ** 2, len(new_bins), # 100*float(len(bins)) / num_bins ** 2, 100*float(len(new_bins)) / num_bins ** 2 ), # patches=None, prop1range=prop1range, prop2range=prop2range, \ # perturbation_methods=["all"]*children_per_generation) box_d = np.append(box_d, new_box_d, axis=0) box_r = np.append(box_r, new_box_r, axis=0) restart_path = os.path.join(config['output_dir'], "restart.txt.npz") dump_restart(restart_path, box_d, box_r, bin_counts, bin_materials, bins, gen + 1) if benchmark_just_reached or gen == max_generations: shutil.move(restart_path, os.path.join(config['output_dir'], "restart%d.txt.npz" % gen)) if last_benchmark_reached: break with open("pm.csv", 'w', newline='') as f: output_csv_from_db(session, output_file=f) with open("pm-binned.csv", 'w', newline='') as f: # column 12 is void_fraction_geo, 13 is methane loading csv_add_bin_column("pm.csv", [(12, *prop1range, num_bins), (13, *prop2range, num_bins)], output_file=f)
def bin_graph(config_path, csv_path=None, database_path=None): config = load_config_file(config_path) num_bins = config['number_of_convergence_bins'] prop1range = config['structure_parameters']['lattice_constant_limits'] prop2range = config['prop2range'] if 'sweep_points' in config: lattice_sweep_points = sigma_sweep_points = epsilon_sweep_points = config[ 'sweep_points'] else: lattice_sweep_points = config['lattice_sweep_points'] sigma_sweep_points = config['sigma_sweep_points'] epsilon_sweep_points = config['epsilon_sweep_points'] xticks = lattice_sweep_points if xticks > 11: xticks = 11 print("loading materials...") mats_by_lj = {} if csv_path: csvrows = np.loadtxt(csv_path, delimiter=',', skiprows=1) for m in csvrows: lj = (m[4], m[5]) # sigma, epsilon if lj not in mats_by_lj: mats_by_lj[lj] = [] mats_by_lj[lj].append([m[1], m[7]]) # lattice a, abs volumetric loading else: db.init_database(db.get_sqlite_dbcs(database_path)) session = db.get_session() mats = session.query(Material) \ .options(joinedload("structure").joinedload("atom_types")) \ .options(joinedload("gas_loading")).all() print("calculating material properties...") for m in mats: lj = (m.structure.atom_types[0].sigma, m.structure.atom_types[0].epsilon) if lj not in mats_by_lj: mats_by_lj[lj] = [] mats_by_lj[lj].append( [m.structure.a, m.gas_loading[0].absolute_volumetric_loading]) print("plotting...") fig = plt.figure(figsize=(12, 12), tight_layout=True) ax = fig.add_subplot(1, 1, 1) ax.set_xlim(prop1range[0], prop1range[1]) ax.set_ylim(prop2range[0], prop2range[1]) ax.set_xlabel("Lattice constant [Å]") ax.set_ylabel("Methane Loading (V [STP]/V)") ax.set_yticks(prop2range[1] * np.array([0.0, 0.25, 0.5, 0.75, 1.0])) ax.set_yticks(prop2range[1] * np.array(range(0, num_bins + 1)) / num_bins, minor=True) ax.set_xticks(prop1range[0] + (prop1range[1] - prop1range[0]) * np.array(range(0, xticks)) / (xticks - 1)) ax.set_xticks(prop1range[0] + (prop1range[1] - prop1range[0]) * np.array(range(0, num_bins + 1)) / num_bins, minor=True) # if show_grid: ax.grid(linestyle='-', color='0.8', zorder=0) absolute_limits_a = np.linspace(prop1range[0], prop1range[1], 100) ml_atoms_a3_stp = 2.69e-5 absolute_limits_ml = [(1 / a**3) / ml_atoms_a3_stp for a in absolute_limits_a] ax.plot(absolute_limits_a, absolute_limits_ml, lw=3, linestyle="--", color="black", zorder=15, label="all sites filled") tab10 = plt.get_cmap("tab10").colors for (sig, eps), a_ml in mats_by_lj.items(): a_ml = np.array(a_ml) sig_index = limit_index(sig, config['structure_parameters']['sigma_limits'], sigma_sweep_points) eps_index = limit_index( eps, config['structure_parameters']['epsilon_limits'], epsilon_sweep_points) # print(sig, eps, eps_index) color = tab10[sig_index] alpha = (eps_index + 1) / epsilon_sweep_points if eps_index + 1 == epsilon_sweep_points: label = "sigma = %4.3f" % sig else: label = None ax.plot(a_ml[:, 0], a_ml[:, 1], lw=3, color=color, zorder=20, alpha=alpha, label=label) ax.legend() ax.set_title( "Methane loading vs lattice constant. Lines colored by sigma. \nLine transparency shows epsilon (no transparency is highest epsilon value; highest transparency is lowest epsilon value)" ) fig.savefig("sig_eps_a_ml.png") plt.close(fig)
def dof_analysis(config_path, output_directory): config = load_config_file(config_path) db.init_database(config["database_connection_string"]) session = db.get_session() children_per_generation = config['children_per_generation'] prop1range = config['prop1range'] prop2range = config['prop2range'] num_bins = config['number_of_convergence_bins'] bin_counts = np.zeros((num_bins, num_bins)) vf_binunits = (prop1range[1] - prop1range[0]) / num_bins ml_binunits = (prop2range[1] - prop2range[0]) / num_bins materials = session.query(Material) perturbation_types = [ "lattice", "lattice_nodens", "atom_types", "atom_sites", "density", "all" ] tsv_output_path = os.path.join(output_directory, "data.tsv") tsvfile = open(tsv_output_path, 'w') tsv = csv.writer(tsvfile, delimiter="\t", lineterminator="\n") tsv.writerow( [""] + list(chain.from_iterable([[t] * 5 for t in perturbation_types]))) tsv.writerow(["gen"] + list( chain.from_iterable([["#", "∆vf", "∆ml", "dist", "new_bins"] for t in perturbation_types]))) mats_d = materials.all() mats_r = [(m.void_fraction[0].void_fraction, m.gas_loading[0].absolute_volumetric_loading) for m in mats_d] new_mats_d = mats_d[0:children_per_generation] new_mats_r = mats_r[0:children_per_generation] new_bins = calc_bins(new_mats_r, num_bins, prop1range=prop1range, prop2range=prop2range) for i, (bx, by) in enumerate(new_bins): bin_counts[bx, by] += 1 pts = {t: [] for t in perturbation_types} gen = 1 new_mats_d = mats_d[gen * children_per_generation:(gen + 1) * children_per_generation] new_mats_r = mats_r[gen * children_per_generation:(gen + 1) * children_per_generation] animation = [[[b[0], b[1], -1, -1] for b in new_bins]] while len(new_mats_d) > 0: new_bins = calc_bins(new_mats_r, num_bins, prop1range=prop1range, prop2range=prop2range) gen_animation = [] gen_stats = {t: [0, 0.0, 0.0, 0.0, 0] for t in perturbation_types} for i, m in enumerate(new_mats_d): m_stats = gen_stats[m.perturbation] m_stats[0] += 1 dvf = (m.void_fraction[0].void_fraction - m.parent.void_fraction[0].void_fraction) / vf_binunits dml = (m.gas_loading[0].absolute_volumetric_loading - m.parent.gas_loading[0].absolute_volumetric_loading ) / ml_binunits m_stats[1] += dvf m_stats[2] += dml m_stats[3] += (dvf**2 + dml**2)**0.5 if bin_counts[new_bins[i][0], new_bins[i][1]] == 0: m_stats[4] += 1 # generate information for animation script parent_r = (m.parent.void_fraction[0].void_fraction, m.parent.gas_loading[0].absolute_volumetric_loading) parent_bin = calc_bins([parent_r], num_bins, prop1range=prop1range, prop2range=prop2range)[0] gen_animation.append( [new_bins[i][0], new_bins[i][1], parent_bin[0], parent_bin[1]]) # this and dml needed for output of numpy arrays # num_materials, ∆vf, ∆ml, ∆all, new_bins pts[m.perturbation].append([ m.parent.gas_loading[0].absolute_volumetric_loading / ml_binunits, dml ]) for i, (bx, by) in enumerate(new_bins): bin_counts[bx, by] += 1 row = [gen] + list( chain.from_iterable([gen_stats[t] for t in perturbation_types])) tsv.writerow(row) gen += 1 new_mats_d = mats_d[gen * children_per_generation:(gen + 1) * children_per_generation] new_mats_r = mats_r[gen * children_per_generation:(gen + 1) * children_per_generation] animation.append(gen_animation) np.save(os.path.join(output_directory, "animation"), animation) for k in pts: np.save(os.path.join(output_directory, k), pts[k])
def init_worker(config): """initialization function for worker that inits the database and gets a worker-specific session.""" global worker_session _, worker_session = db.init_database(config["database_connection_string"]) return