def test_forward(): # load paddle model paddle_model = mv3_small_paddle() paddle_model.eval() paddle_state_dict = paddle.load("./data/mv3_small_paddle.pdparams") paddle_model.set_dict(paddle_state_dict) # load torch model torch_model = mv3_small_torch() torch_model.eval() torch_state_dict = torch.load("./data/mobilenet_v3_small-047dcff4.pth") torch_model.load_state_dict(torch_state_dict) # prepare logger & load data reprod_logger = ReprodLogger() paddle_dataset, paddle_dataloader = build_paddle_data_pipeline() torch_dataset, torch_dataloader = build_torch_data_pipeline() for idx, (paddle_batch, torch_batch) in enumerate( zip(paddle_dataloader, torch_dataloader)): if idx > 0: break evaluate(paddle_batch[0], paddle_batch[1], paddle_model, accuracy_paddle, 'paddle', reprod_logger) evaluate(torch_batch[0], torch_batch[1], torch_model, accuracy_torch, 'ref', reprod_logger)
def do_symmetric_surface(test_dir, in_plane_supercell=[1,1], pert_pos=0.0): assert len(supercell) == 2 surf = ase.io.read(test_dir+"/surface.xyz", format="extxyz") surf *= list(in_plane_supercell) + [1] if pert_pos > 0.0: surf.rattle(pert_pos) bulk = rescale_to_relaxed_bulk(surf) bulk_Zs = bulk.get_atomic_numbers() evaluate(bulk) bulk_cell = bulk.get_cell() bulk_E = bulk.get_potential_energy() try: model.reset_config() except AttributeError: pass print("got relaxed bulk cell ", bulk_cell) print("got rescaled surf cell ", surf.get_cell()) # relax surface system tol = 1.0e-2 surf = relax_config(surf, relax_pos=True, relax_cell=False, tol=tol, save_traj=True, config_label="surface", from_base_model=True, save_config=True, try_restart=True) ase.io.write(os.path.join("..",run_root+"-relaxed.xyz"), surf, format='extxyz') # check stoichiometry and number of bulk cell energies to subtract surf_Zs = surf.get_atomic_numbers() Z0 = bulk_Zs[0] n_bulk_cells = float(sum(surf_Zs == Z0))/float(sum(bulk_Zs == Z0)) if len(set(bulk_Zs)) == 1: n_dmu = None else: n_dmu = {} for Z in set(bulk_Zs): # make sure types are JSON compatible n_dmu[int(Z)] = float(n_bulk_cells*sum(bulk_Zs == Z) - sum(surf_Zs == Z)) # calculate surface energy area = np.linalg.norm(np.cross(surf.get_cell()[0,:],surf.get_cell()[1,:])) print("got surface cell potential energy", surf.get_potential_energy()) print("got bulk potential energy",bulk_E*n_bulk_cells) print("got area",area) return { "bulk_struct_test" : surf.info["bulk_struct_test"], "Ef" : (surf.get_potential_energy() - bulk_E*n_bulk_cells)/(2.0*area), "dmu" : n_dmu, 'filename' : run_root+"-relaxed.xyz" }
def train(event, context): """Trains our ML model.""" utilities.download_directory(uri=f"s3://{DATA_BUCKET}/{DATA_PREFIX}", dst=DATA_TMP_DST) df = utilities.read_csv_directory(DATA_TMP_DST) print(f"SHAPE: {df.shape}") train, test = utilities.train_test_split(df) X_train, y_train = utilities.Xy_split(train, target='y') X_test, y_test = utilities.Xy_split(test, target='y') X_train = utilities.preprocessing.preprocess(X_train) X_test = utilities.preprocessing.preprocess(X_test) model = utilities.Model() model.fit(X_train, y_train) y_hat = model.predict(X_test) eval_results = utilities.evaluate(y_actual=y_test, y_predict=y_hat) utilities.save_model(obj=model, uri=f"s3://{DATA_BUCKET}/{MODEL_PREFIX}") return { "status": "success", "results": eval_results, }
def alphaBeta(board, currentDepth, maxDepth, emptySpaces, isXsTurn, alpha, beta): evaluation = evaluate(board) if currentDepth == maxDepth or not len(emptySpaces) or abs( evaluation) == abs(X_WIN): return evaluation, (), 0 optimalMove = () searches = 0 optimalScore = -INF if isXsTurn else INF for space in emptySpaces: nextState, _ = move(board, space[0], space[1], X if isXsTurn else O) nextScore, _, subtreeSearches = alphaBeta(nextState, currentDepth + 1, maxDepth, findEmptySpaces(nextState), not isXsTurn, alpha, beta) searches += 1 + subtreeSearches if isXsTurn and optimalScore < nextScore: optimalScore = nextScore optimalMove = space alpha = max(alpha, optimalScore) elif not isXsTurn and optimalScore > nextScore: optimalScore = nextScore optimalMove = space beta = min(beta, optimalScore) if beta <= alpha: break return optimalScore, optimalMove, searches
def minimax(board, currentDepth, maxDepth, emptySpaces, isXsTurn): evaluation = evaluate(board) if currentDepth == maxDepth or not len(emptySpaces) or abs( evaluation) == abs(X_WIN): return evaluation, (), 0 optimalMove = () searches = 0 optimalScore = -INF if isXsTurn else INF for space in emptySpaces: nextState, _ = move(board, space[0], space[1], X if isXsTurn else O) nextScore, _, subtreeSearches = minimax(nextState, currentDepth + 1, maxDepth, findEmptySpaces(nextState), not isXsTurn) searches += 1 + subtreeSearches if isXsTurn and optimalScore < nextScore: optimalScore = nextScore optimalMove = space elif not isXsTurn and optimalScore > nextScore: optimalScore = nextScore optimalMove = space """ Debug string: print( "Depth: {}, Empty Spaces: {}, X's turn: {}, Score: {}, Move: {}, Searches: {}" .format(currentDepth, len(emptySpaces), isXsTurn, optimalScore, optimalMove, searches) ) """ return optimalScore, optimalMove, searches
def next_stump(dataset, stumps): # Initialize helper variables. cols = dataset.columns.get_values() y = dataset.iloc[:, 4].values stump = {} rim = [] # ln 1: Special case, average all y(i) values for each x(i). # The feature to use as root is chosen based off max change # in variance. if len(stumps) == 0: # Overwrite stump value with average. init_subset = dataset init_subset['PR'] = np.average(y) stump = stump_from_dataset(init_subset) # Else build off of previous stump using gradient boosting. else: # ln 2(a): Find r(i)(m) for x(i) in data columns. for index, data in dataset.iterrows(): rim.append(data['PE'] - utilities.evaluate(stumps, data)) # ln 2(b): Fit a regression tree to targets r. rim_subset = dataset rim_subset['PR'] = rim stump = stump_from_dataset(rim_subset) stumps.append(stump) print(stump) print(utilities.mse(dataset, stumps)) return stumps
def do_one_vacancy(bulk_supercell, bulk_supercell_pe, vac_i, relax_radial=0.0, relax_symm_break=0.0, nn_cutoff=0.0, tol=1.0e-2): # do unrelaxed (without perturbations) vac = bulk_supercell.copy() del vac[vac_i] label = "ind_%d_Z_%d" % (vac_i, bulk_supercell.get_atomic_numbers()[vac_i]) unrelaxed_filename=run_root+"-%s-unrelaxed.xyz" % label ase.io.write(os.path.join("..",unrelaxed_filename), vac, format='extxyz') evaluate(vac) unrelaxed_vac_pe = vac.get_potential_energy() # recreate with perturbations for relaxation vac = bulk_supercell.copy() if relax_radial != 0.0 or relax_symm_break != 0.0: nl = NeighborList([nn_cutoff/2.0]*len(bulk_supercell), self_interaction=False, bothways=True) nl.update(bulk_supercell) indices, offsets = nl.get_neighbors(vac_i) offset_factor = relax_radial for i, offset in zip(indices, offsets): ri = vac.positions[vac_i] - (vac.positions[i] + np.dot(offset, vac.get_cell())) vac.positions[i] += offset_factor*ri offset_factor += relax_symm_break del vac[vac_i] vac_pos = vac.positions[vac_i] vac = relax_config(vac, relax_pos=True, relax_cell=False, tol=tol, save_traj=True, config_label=label, from_base_model=True, save_config=True, try_restart=True) relaxed_filename=run_root+"-%s-relaxed.xyz" % label ase.io.write(os.path.join("..",relaxed_filename), vac, format='extxyz') # already has calculator from relax_configs vac_pe = vac.get_potential_energy() if len(set(bulk_supercell.get_atomic_numbers())) == 1: Ebulk = float(len(vac))/float(len(bulk_supercell)) * bulk_supercell_pe else: Ebulk = bulk_supercell_pe Ef0 = unrelaxed_vac_pe - Ebulk Ef = vac_pe - Ebulk print("got vacancy",label,"cell energy",vac_pe,"n_atoms",len(vac)) print("got bulk energy", Ebulk," (scaled to (N-1)/N if single component)") return ( label, unrelaxed_filename, Ef0, relaxed_filename, Ef, int(bulk_supercell.get_atomic_numbers()[vac_i]), vac_pos )
def do_one_antisite_pair(bulk_supercell, bulk_supercell_pe, i1, i2, tol=1.0e-2): assert bulk_supercell.numbers[i1] != bulk_supercell.numbers[i2] # do unrelaxed (without perturbations) antisite = bulk_supercell.copy() Z1, Z2 = antisite.numbers[[i1, i2]] antisite.numbers[i1] = Z2 antisite.numbers[i2] = Z1 label = "ind_%d_Z_%d_ind_%d_Z_%d" % (i1, Z1, i2, Z2) unrelaxed_filename = run_root + "-%s-unrelaxed.xyz" % label ase.io.write(os.path.join("..", unrelaxed_filename), antisite, format='extxyz') evaluate(antisite) unrelaxed_antisite_pe = antisite.get_potential_energy() antisite = relax_config(antisite, relax_pos=True, relax_cell=False, tol=tol, save_traj=True, config_label=label, from_base_model=True, save_config=True, try_restart=True) relaxed_filename = run_root + "-%s-relaxed.xyz" % label ase.io.write(os.path.join("..", relaxed_filename), antisite, format='extxyz') # already has calculator from relax_configs antisite_pe = antisite.get_potential_energy() Ef0 = unrelaxed_antisite_pe - bulk_supercell_pe Ef = antisite_pe - bulk_supercell_pe print("got antisite", label, "cell energy", antisite_pe, "n_atoms", len(antisite)) print("got bulk energy", bulk_supercell_pe) return (label, unrelaxed_filename, Ef0, relaxed_filename, Ef, Z1, Z2)
losses = [] m = 150 for i in range(1, m + 1): print("Step %d of %d\n" % (i, m)) stumps = next_stump(train_dataset, stumps) losses.append(utilities.mse(train_dataset, stumps)) # Save trees to a file. f = open("trees_%d.txt" % (m), "a+") f.write("%s:%f:%f:%f:%f\n" % (stumps[-1]['attribute'], stumps[-1]['value'], stumps[-1]['left'], stumps[-1]['right'], losses[-1])) f.close() # Check stopping condition(s). if stumps[-1]['gradient'] == math.inf: m = i break utilities.plot_loss(m, losses, train_dataset, 'Mean Standard Error; lambda = 0.50', 'cornflowerblue') test_losses = [] y_test = test_dataset.iloc[:, 4].values.tolist() for index, data in test_dataset.iterrows(): test_losses.append((data['PE'] - utilities.evaluate(stumps, data))**2) utilities.plot_test(test_losses, y_test, 'Non-Library Loss for Test Dataset', 'lightseagreen') utilities.stats(test_dataset, stumps, 'Gradient Boosting Tree Algorithm')
def do_all_interstitials(test_dir, nn_cutoff=0.0, tol=1.0e-2): print("doing do_all_interstitials") bulk_supercell = ase.io.read(os.path.join(test_dir, "bulk_supercell.xyz"), format="extxyz") print("got bulk_supercell ", len(bulk_supercell)) bulk = rescale_to_relaxed_bulk(bulk_supercell) # relax bulk supercell positions in case it's only approximate (as it must be for different models), but stick # to relaxed bulk's lattice constants as set by rescale_to_relaxed_bulk bulk_supercell = relax_config(bulk_supercell, relax_pos=True, relax_cell=False, tol=tol, save_traj=True, config_label="relaxed_bulk", from_base_model=True, save_config=True) ase.io.write(os.path.join("..", run_root + "-rescaled-bulk.xyz"), bulk_supercell, format='extxyz') print("got bulk primitive cell ", bulk.get_cell()) print("got rescaled bulk_supercell cell ", bulk_supercell.get_cell()) try: # Cartesian 3-vector interstitial_pos_l = [ np.array([float(x) for x in bulk_supercell.info["interstitials"]]) ] if len(interstitial_pos_l) != 3: raise ValueError("not a 3-vector") except: interstitial_pos_type = bulk_supercell.info["interstitials"].split()[0] if interstitial_pos_type == "mean": neighbor_indices = [ int(i) for i in bulk_supercell.info["interstitials"].split()[1:] ] if len(neighbor_indices) < 2: raise ValueError( "interstitial position type mean, but {} < 2 indices". format(len(neighbor_indices))) interstitial_pos_l = [ np.sum(bulk_supercell.get_positions()[neighbor_indices], axis=0) / float(len(neighbor_indices)) ] elif interstitial_pos_type == "inequivalent": if 'arb_supercell' in bulk_supercell.info: print("making bulk supercell from", bulk_supercell.info['arb_supercell'].reshape((3, 3))) bulk_supersupercell = ase.build.make_supercell( bulk_supercell, bulk_supercell.info['arb_supercell'].reshape((3, 3))) print("got supersupercell with ", len(bulk_supersupercell), "atoms, cell\n", bulk_supersupercell.get_cell()) voids = find_voids(bulk_supercell) interstitial_pos_l = [(v[1], v[2], v[3]) for v in voids] bulk_supersupercell.info.update(bulk_supercell.info) bulk_supercell = bulk_supersupercell else: raise ValueError("Unknown interstitial position type in '" + bulk_supercell.info["interstitials"] + "'") evaluate(bulk_supercell) bulk_supercell_pe = bulk_supercell.get_potential_energy() properties = { "bulk_struct_test": bulk_supercell.info["bulk_struct_test"], "bulk_E_per_atom": bulk_supercell_pe / len(bulk_supercell), "defects": {} } Z_list = ([bulk_supercell.info["Zs"]] if isinstance( bulk_supercell.info["Zs"], (int, np.integer)) else bulk_supercell.info["Zs"]) for interstitial_Z in Z_list: try: relax_radial = bulk_supercell.info['relax_radial_{}'.format( interstitial_Z)] except: relax_radial = 0.0 try: relax_symm_break = bulk_supercell.info[ 'relax_symm_break_{}'.format(interstitial_Z)] except: relax_symm_break = 0.0 for interst_i, interst_pos in enumerate(interstitial_pos_l): label = f'Z_{interstitial_Z}_pos_{interst_i}' (unrelaxed_filename, Ef0, relaxed_filename, Ef, interstitial_i) = do_one_interstitial( bulk_supercell, bulk_supercell_pe, interstitial_Z, interst_pos, label, relax_radial, relax_symm_break, nn_cutoff, tol) properties["defects"][label] = { 'Ef0': Ef0, 'Ef': Ef, 'unrelaxed_filename': unrelaxed_filename, 'relaxed_filename': relaxed_filename, 'atom_ind': int(interstitial_i), 'Z': int(interstitial_Z), 'pos_index': interst_i } if len(set(bulk_supercell.get_atomic_numbers())) != 1: properties["defects"][label]['dmu'] = [-1, int(interstitial_Z)] return properties
def do_farthest_inequiv_pairs(test_dir, tol=1.0e-2): print("doing do_farthest_antisite_pairs") bulk_supercell = ase.io.read(os.path.join(test_dir, "bulk_supercell.xyz"), format="extxyz") print("got bulk_supercell ", len(bulk_supercell)) bulk = rescale_to_relaxed_bulk(bulk_supercell) # relax bulk supercell positions in case it's only approximate (as it must be for different models), but stick # to relaxed bulk's lattice constants as set by rescale_to_relaxed_bulk bulk_supercell = relax_config(bulk_supercell, relax_pos=True, relax_cell=False, tol=tol, save_traj=True, config_label="rescaled_bulk", from_base_model=True, save_config=True) ase.io.write(os.path.join("..", run_root + "-rescaled-bulk.xyz"), bulk_supercell, format='extxyz') print("got bulk primitive cell ", bulk.get_cell()) print("got rescaled bulk_supercell cell ", bulk_supercell.get_cell()) if 'arb_supercell' in bulk_supercell.info: print("making bulk supercell from", bulk_supercell.info['arb_supercell'].reshape((3, 3))) bulk_supersupercell = ase.build.make_supercell( bulk_supercell, bulk_supercell.info['arb_supercell'].reshape( (3, 3))) print("got supersupercell with ", len(bulk_supersupercell), "atoms, cell\n", bulk_supersupercell.get_cell()) bulk_supersupercell.info.update(bulk_supercell.info) bulk_supercell = bulk_supersupercell sym_data = spglib.get_symmetry_dataset(bulk_supercell, symprec=0.01) equiv_at = set([ tuple(iZ) for iZ in zip(sym_data["equivalent_atoms"], bulk_supercell.numbers) ]) # print("equiv_at", equiv_at) antisite_list = [] for i1, Z1 in equiv_at: for i2_proto, Z2 in equiv_at: if Z1 <= Z2: continue # print("check i", i1, i2_proto, "Z", Z1, Z2) i2s = np.where(sym_data["equivalent_atoms"] == i2_proto)[0] i2_dists = bulk_supercell.get_distances(i1, i2s, mic=True) farthest_ind = np.argmax(i2_dists) i2 = i2s[farthest_ind] antisite_list.append((i1, i2)) # print("antisite_list", antisite_list) ## evaluate(bulk_supercell) bulk_supercell_pe = bulk_supercell.get_potential_energy() properties = { "bulk_struct_test": bulk_supercell.info["bulk_struct_test"], "bulk_E_per_atom": bulk_supercell_pe / len(bulk_supercell), "defects": {} } for i1, i2 in antisite_list: (label, unrelaxed_filename, Ef0, relaxed_filename, Ef, Z1, Z2) = do_one_antisite_pair(bulk_supercell, bulk_supercell_pe, i1, i2, tol) properties["defects"][label] = { 'Ef0': Ef0, 'Ef': Ef, 'unrelaxed_filename': unrelaxed_filename, 'relaxed_filename': relaxed_filename, 'atom_inds': (int(i1), int(i2)), 'Zs': (int(Z1), int(Z2)) } print("returning properties", properties) return properties
def test_evaluate(self): np.random.seed(123) truth = np.random.poisson(2.0, 100) prediction = np.ones(100) * 2.0 plt.clf() utilities.evaluate(prediction, truth)
def do_one_interstitial(bulk_supercell, bulk_supercell_pe, interstitial_Z, interstitial_pos, label, relax_radial=0.0, relax_symm_break=0.0, nn_cutoff=0.0, tol=1.0e-2): interst = bulk_supercell.copy() interst += Atoms(numbers=[interstitial_Z], positions=[interstitial_pos]) interstitial_i = len(interst) - 1 if relax_radial != 0.0 or relax_symm_break != 0.0: nl = NeighborList([nn_cutoff / 2.0] * len(bulk_supercell), self_interaction=False, bothways=True) nl.update(bulk_supercell) indices, offsets = nl.get_neighbors(interstitial_i) offset_factor = relax_radial for i, offset in zip(indices, offsets): ri = interst.positions[interstitial_i] - ( interst.positions[i] + np.dot(offset, interst.get_cell())) interst.positions[i] += offset_factor * ri offset_factor += relax_symm_break if "interstitial_constraint" in bulk_supercell.info: (constr_type, constr_subtype ) = bulk_supercell.info["interstitial_constraint"].split()[0:2] if constr_type == "plane": if constr_subtype == "atoms": indices = [ int(i) for i in bulk_supercell.info["interstitial_constraint"].split()[2:] ] if len(indices) != 3: raise ValueError( "number of indices not 3 for plane atoms '{}'".format( bulk_supercell.info["interstitial_constraint"])) p = interst.get_positions() constr_normal = np.cross(p[indices[0]] - p[indices[1]], p[indices[0]] - p[indices[2]]) elif constr_subtype == "vector": constr_normal = np.array( bulk_supercell.info["interstitial_constraint"].split()[2:]) else: raise ValueError( "unknown interstitial constraint subtype for plane '{}'". format(bulk_supercell.info["interstitial_constraint"])) print("setting constraint FixedPlane with normal", constr_normal) interst.set_constraint(FixedPlane(interstitial_i, constr_normal)) else: raise ValueError( "unknown interstitial constraint type '{}'".format( bulk_supercell.info["interstitial_constraint"])) evaluate(interst) unrelaxed_interstitial_pe = interst.get_potential_energy() if len(set(bulk_supercell.get_atomic_numbers())) == 1: Ebulk = float(len(interst)) / float( len(bulk_supercell)) * bulk_supercell_pe else: Ebulk = bulk_supercell_pe Ef0 = unrelaxed_interstitial_pe - Ebulk unrelaxed_filename = run_root + "-%s-unrelaxed.xyz" % label ase.io.write(os.path.join("..", unrelaxed_filename), interst, format='extxyz') print("got unrelaxed interstitial {} cell energy".format(label), unrelaxed_interstitial_pe) try: interst = relax_config(interst, relax_pos=True, relax_cell=False, tol=tol, save_traj=True, config_label=label, from_base_model=True, save_config=True, try_restart=True) relaxed_filename = run_root + "-%s-relaxed.xyz" % label ase.io.write(os.path.join("..", relaxed_filename), interst, format='extxyz') interstitial_pe = interst.get_potential_energy() Ef = interstitial_pe - Ebulk print("got relaxed interstitial {} cell energy".format(label), interstitial_pe) except: relaxed_filename = None Ef = None print("got bulk energy", Ebulk) return (unrelaxed_filename, Ef0, relaxed_filename, Ef, interstitial_i)
def do_interstitial(test_dir, nn_cutoff=0.0, tol=1.0e-2): print("doing do_interstitial") bulk_supercell = ase.io.read(os.path.join(test_dir, "bulk_supercell.xyz"), format="extxyz") print("got bulk_supercell ", len(bulk_supercell)) bulk = rescale_to_relaxed_bulk(bulk_supercell) # relax bulk supercell positions in case it's only approximate (as it must be for different models), but stick # to relaxed bulk's lattice constants as set by rescale_to_relaxed_bulk bulk_supercell = relax_config(bulk_supercell, relax_pos=True, relax_cell=False, tol=tol, traj_file=None, config_label="relaxed_bulk", from_base_model=True, save_config=True) evaluate(bulk_supercell) bulk_supercell_pe = bulk_supercell.get_potential_energy() ase.io.write(os.path.join("..", run_root + "-rescaled-bulk.xyz"), bulk_supercell, format='extxyz') print("got bulk primitive cell ", bulk.get_cell()) print("got rescaled bulk_supercell cell ", bulk_supercell.get_cell()) properties = { "bulk_struct_test": bulk_supercell.info["bulk_struct_test"], "bulk_E_per_atom": bulk_supercell_pe / len(bulk_supercell), "defects": {} } try: # Cartesian 3-vector interstitial_pos = np.array( [float(x) for x in bulk_supercell.info["interstitial_position"]]) if len(interstitial_pos) != 3: raise ValueError("not a 3-vector") except: interstitial_pos_type = bulk_supercell.info[ "interstitial_position"].split()[0] if interstitial_pos_type == "mean": neighbor_indices = [ int(i) for i in bulk_supercell.info["interstitial_position"].split()[1:] ] if len(neighbor_indices) < 2: raise ValueError( "interstitial position type mean, but {} < 2 indices". format(len(neighbor_indices))) interstitial_pos = np.sum( bulk_supercell.get_positions()[neighbor_indices], axis=0) / float(len(neighbor_indices)) else: raise ValueError("Unknown interstitial position type in '" + bulk_supercell.info["interstitial_position"] + "'") if isinstance(bulk_supercell.info["Zs"], list): Z_list = bulk_supercell.info["Zs"] else: Z_list = [bulk_supercell.info["Zs"]] for interstitial_Z in Z_list: try: relax_radial = bulk_supercell.info['relax_radial_{}'.format( interstitial_Z)] except: relax_radial = 0.0 try: relax_symm_break = bulk_supercell.info[ 'relax_symm_break_{}'.format(interstitial_Z)] except: relax_symm_break = 0.0 (label, unrelaxed_filename, Ef0, relaxed_filename, Ef, interstitial_i) = do_one_interstitial( bulk_supercell, bulk_supercell_pe, interstitial_Z, interstitial_pos, relax_radial, relax_symm_break, nn_cutoff, tol) properties["defects"][label] = { 'Ef0': Ef0, 'Ef': Ef, 'unrelaxed_filename': unrelaxed_filename, 'relaxed_filename': relaxed_filename, 'atom_ind': int(interstitial_i), 'Z': int(interstitial_Z) } if len(set(bulk_supercell.get_atomic_numbers())) != 1: properties["defects"][label]['dmu'] = [-1, interstitial_Z] return properties
def do_lattice(bulk, use_precon=True, elastic=True, tol=1.0e-3): print "unrelaxed bulk" ase.io.write(sys.stdout, bulk, format='extxyz') # use one of the routines from utilities module to relax the initial # unit cell and atomic positions if use_precon: bulk = relax_atoms_cell(bulk, tol=tol, traj_file="bulk.relax.extxyz", symmetrize=True) else: bulk = relax_atoms_cell(bulk, tol=tol, traj_file=None, method='cg_n', symmetrize=True) print "relaxed bulk" ase.io.write(sys.stdout, bulk, format='extxyz') print "calculating elastic constants" precon = None if use_precon: precon = Exp(3.0) opt = lambda atoms, **kwargs: PreconLBFGS(atoms, precon=precon, **kwargs) if elastic: # reset calculator to non-symmetrized one (not optimal, but would otherwise need to have optimizer used by fit_elastic_constants to reset symmetry for each relaxation):w bulk.set_calculator(model.calculator) try: elastic_consts = matscipy.elasticity.fit_elastic_constants( bulk, symmetry='tetragonal_high', optimizer=opt, logfile=sys.stdout) except RuntimeError: # fallback on FIRE if we get a linesearch failure with LBFGS opt = FIRE elastic_consts = matscipy.elasticity.fit_elastic_constants( bulk, symmetry='tetragonal_high', optimizer=opt, logfile=sys.stdout) c11 = elastic_consts[0][0, 0] / GPa c33 = elastic_consts[0][2, 2] / GPa c12 = elastic_consts[0][0, 1] / GPa c13 = elastic_consts[0][0, 2] / GPa c44 = elastic_consts[0][3, 3] / GPa c66 = elastic_consts[0][5, 5] / GPa print "calculating E vs. V" V0 = bulk.get_volume() dV = bulk.get_volume() * 0.025 E_vs_V = [] scaled_bulk = bulk.copy() print "bulk going into E vs. V" ase.io.write(sys.stdout, scaled_bulk, format='extxyz') f = open("relaxed_E_vs_V_configs.xyz", "w") scaled_bulk = bulk.copy() constraints = [] # scaled_bulk.arrays["move_mask_3"] = np.zeros((len(scaled_bulk),3), dtype=np.int) # scaled_bulk.arrays["move_mask_3"][:,0] = 1 # for i in range(len(scaled_bulk)): # constraints.append(FixedLine_forces_only(i, (0.0, 0.0, 1.0))) # scaled_bulk.set_constraint(constraints) for i in range(0, -5 - 1, -1): print "doing volume step", i vcur = scaled_bulk.get_volume() scaled_bulk.set_cell(scaled_bulk.get_cell() * ((V0 + i * dV) / vcur)**(1.0 / 3.0), scale_atoms=True) try: scaled_bulk = relax_atoms_cell(scaled_bulk, tol=tol, traj_file=None, constant_volume=True, method='cg_n', symmetrize=True, max_steps=500) except: break print "done relaxing step", i ase.io.write(f, scaled_bulk, format='extxyz') f.flush() E_vs_V.insert(0, (scaled_bulk.get_volume() / len(bulk), scaled_bulk.get_potential_energy() / len(bulk))) evaluate(scaled_bulk) print "done evaluate step", i print "EV ", i, scaled_bulk.get_volume( ), scaled_bulk.get_potential_energy(), scaled_bulk.get_stress() scaled_bulk = bulk.copy() # scaled_bulk.arrays["move_mask_3"] = np.zeros((len(scaled_bulk),3), dtype=np.int) # scaled_bulk.arrays["move_mask_3"][:,0] = 1 # scaled_bulk.set_constraint(constraints) for i in range(1, 6 + 1): print "doing volume step", i vcur = scaled_bulk.get_volume() scaled_bulk.set_cell(scaled_bulk.get_cell() * ((V0 + i * dV) / vcur)**(1.0 / 3.0), scale_atoms=True) try: scaled_bulk = relax_atoms_cell(scaled_bulk, tol=tol, traj_file=None, constant_volume=True, method='cg_n', symmetrize=True, max_steps=500) except: break print "done relaxing step", i ase.io.write(f, scaled_bulk, format='extxyz') f.flush() E_vs_V.append((scaled_bulk.get_volume() / len(bulk), scaled_bulk.get_potential_energy() / len(bulk))) evaluate(scaled_bulk) print "done evaluate step", i print "EV ", i, scaled_bulk.get_volume( ), scaled_bulk.get_potential_energy(), scaled_bulk.get_stress() for (V, E) in E_vs_V: print "EV_final ", V, E if elastic: return (c11, c33, c12, c13, c44, c66, E_vs_V) else: return (E_vs_V)
def main(): """ Entry point for list-extractor. Parses parameters and calls other modules in order to serialize a RDF graph. Takes following **command-line parameters**: * **collect_mode** : ``s`` or ``a`` Use ``s`` to specify a single resource or ``a`` for a class of resources in the next parameter. * **source** : a string representing a class of resources from DBpedia ontology (find supported domains below), or a single Wikipedia page of any resource. * **language**: ``en``, ``it``, ``de``, ``es`` (for now, available only for selected domains) a two-letter prefix corresponding to the desired language of Wikipedia pages and SPARQL endpoint to be queried. * **-c --classname**: a string representing classnames you want to associate your resource with. Applicable only for ``collect_mode="s"``. """ # initialize argparse parameters parser = argparse.ArgumentParser(description='Extract data from lists in Wikipedia pages and serialize it in RDF.' '\nExample: `python listExtractor.py a Writer en.`', formatter_class=argparse.RawTextHelpFormatter, usage="listExtractor.py [--help] collection_mode resource language " "\nUse listExtractor.py -h for more details.\n ") parser.add_argument('collect_mode', help="'s' to specify a single Wikipedia page;\n'a' for all resources from a DBpedia class.\n ", choices=['s', 'a', 'l', 'q']) parser.add_argument('source', type=str, help='Select resource to extract lists from. Options are:' '\nSingle Wikipedia page (example: William_Gibson) ' '\nDBpedia ontology class (example: Writer)\n ') parser.add_argument('language', type=str, choices=['en', 'it', 'de', 'es'], default='en', help='Language prefix of Wikipedia pages to analyze.' '\nen: English (Default)\nit: Italian\nde: German\nes: Spanish\n') parser.add_argument("-c", "--classname", type=str, help='Provide a classname from settings.json and use its' '\nmapper functions') parser.add_argument('pattern', type=int, default=0, help='Pattern that will be applied to extracting') args = parser.parse_args() # initialize RDF graph which will contain the triples g = rdflib.Graph() g.bind("dbo", "http://dbpedia.org/ontology/") g.bind("dbr", "http://dbpedia.org/resource/") # start extracting lists from resources if args.collect_mode == 's': # extract list information from a single resource # resource = args.source.encode('utf-8') # apply utf-8 encoding resource = args.source.encode() # resource = args.source resDict = wikiParser.main_parser(args.language, resource) # create a dict representing the resource for key in resDict: # print data present in the resDict print(key, ":", resDict[key]) print('') ''' Decomment the line below to create a file inside a resources folder containing the dictionary''' utilities.createResFile(resDict, args.language, resource) # Asks the endpoint for a list of types/classes associated to the resource if args.classname is None: rdf_type = utilities.get_resource_type(args.language, resource) print(rdf_type) else: rdf_type = [classes.strip() for classes in args.classname.split(',')] # print rdf_type list_elems = 0 # Used to keep trace of the number of list elements extracted for t in rdf_type: # for each type found, look for a suitable mapping and apply it list_elems += mapper.select_mapping(resDict, resource, args.language, t, g) # get number of elements extracted # print '>>>>>', t, list_elems tot_list_elems = utilities.count_listelem_dict(resDict) # count all list elements of the resource print("Total elements extracted: " + str(list_elems) + "/" + str(tot_list_elems)) elif args.collect_mode == 'a': # extract lists from a class of resources from DBpedia ontology (e.g. 'Writer') if utilities.check_existing_class( args.source): # Check if the domain has already been mapped (in settings.json) try: print('Fetching resources, please wait......') resources = utilities.get_resources(args.language, args.source) res_num = len(resources) # total number of resources curr_num = 1 # current resource to be analyzed except: print("Could not find specified class of resources: " + args.source) sys.exit(0) else: print('\nThis domain has not been mapped yet!') print('You can add a mapping for this domain using rulesGenerator.py and try again...') sys.exit(0) tot_extracted_elems = 0 # Used to keep track of the number of list elements extracted tot_elems = 0 # Used to keep track of total number of list elements total_res_failed = 0 print('Completed! Found', str(res_num), 'resources.\nStarting extraction....\n') for res in resources: try: print(res + " (" + str(curr_num) + " of " + str(res_num) + ")") resDict = wikiParser.main_parser(args.language, res) # create a dict representing each resource tot_elems += utilities.count_listelem_dict(resDict) '''Decomment the line below to create a file inside a resources folder containing the dictionary''' # utilities.createResFile(resDict, args.language, res) except: # handle parsing errors; no dict found or no relevant sections found print("Could not parse " + args.language + ":" + res) total_res_failed += 1 curr_num += 1 else: # succesfully parsed; proceed and form triples curr_num += 1 print(">>> " + args.language + ":" + res + " has been successfully parsed <<<") extr_elems = mapper.select_mapping(resDict, res, args.language, args.source, g) mapper.mapped_domains = [] # reset domains already mapped for next resource tot_extracted_elems += extr_elems print( ">>> Mapped " + args.language + ":" + res + ", extracted elements: " + str(extr_elems) + " <<<\n") # evaluation metrics for the extraction process; store relevant stats in evaluation.csv utilities.evaluate(args.language, args.source, res_num, res_num - total_res_failed, tot_extracted_elems, tot_elems, len(g)) elif args.collect_mode == 'l': resource = args.source.encode() resDict = wikiParser.main_parser(args.language, resource) # create a dict representing the resource pattern = args.pattern for key in resDict: # print data present in the resDict print(key, ":", resDict[key]) print('') ''' Decomment the line below to create a file inside a resources folder containing the dictionary''' utilities.createResFile(resDict, args.language, resource) list_elems = 0 # Used to keep trace of the number of list elements extracted # for each type found, look for a suitable mapping and apply it list_elems += listMApper.list_select_mapping(resDict, resource, args.language, g, pattern) # get number of elements extracted # print '>>>>>', t, list_elems tot_list_elems = utilities.count_listelem_dict(resDict) # count all list elements of the resource print("Total elements extracted: " + str(list_elems) + "/" + str(tot_list_elems)) elif args.collect_mode == 'q': pages = utilities.find_all_element() for p in pages: print(p) print("finish finding") for p in pages: resource = p.replace(" ", "_").encode() resDict = wikiParser.main_parser(args.language, resource) # create a dict representing the resource for key in resDict: # print data present in the resDict print(key, ":", resDict[key]) print('') ''' Decomment the line below to create a file inside a resources folder containing the dictionary''' utilities.createResFile(resDict, args.language, resource) list_elems = 0 # Used to keep trace of the number of list elements extracted # for each type found, look for a suitable mapping and apply it list_elems += listMApper.list_select_mapping(resDict, resource, args.language, g) # get number of elements extracted # print '>>>>>', t, list_elems tot_list_elems = utilities.count_listelem_dict(resDict) # count all list elements of the resource print("Total elements extracted: " + str(list_elems) + "/" + str(tot_list_elems)) # If the graph contains at least one statement, create a .ttl file with the RDF triples created g_length = len(g) if g_length > 0: file_name = "ListExtractor_" + args.source + "_" + args.language + "_" + utilities.getDate() + ".ttl" file_path = utilities.get_subdirectory('extracted', file_name) g.serialize(file_path, format="turtle") print(str(g_length) + " statements created. Triples serialized in: " + file_path) else: print("Could not serialize any RDF statement! :(")
# set of utility routines specific this this model/testing framework # the current model import model, utilities force_component_errors = [] ats = ase.io.read(os.path.join(os.path.dirname(__file__), 'testing_database.xyz'), index=':', format='extxyz') for at in ats: if len(at) > 1: at.wrap() # ase.io.write(sys.stdout, at, format="extxyz") # sys.stdout.flush() try: at = utilities.evaluate(at) try: dft_f = at.arrays['dft_force'] except: try: dft_f = at.arrays['DFT_force'] except: pass f = at.get_forces() for i in range(len(at)): try: c_t = at.info['config_type'] except KeyError: c_t = 'NONE' force_component_errors.append( (dft_f[i, 0], f[i, 0] - dft_f[i, 0], c_t))
def do_all_vacancies(test_dir, nn_cutoff=0.0, tol=1.0e-2): print("doing do_all_vacancies") bulk_supercell = ase.io.read(os.path.join(test_dir,"bulk_supercell.xyz"), format="extxyz") print("got bulk_supercell ", len(bulk_supercell)) bulk = rescale_to_relaxed_bulk(bulk_supercell) # relax bulk supercell positions in case it's only approximate (as it must be for different models), but stick # to relaxed bulk's lattice constants as set by rescale_to_relaxed_bulk bulk_supercell = relax_config(bulk_supercell, relax_pos=True, relax_cell=False, tol=tol, save_traj=True, config_label="rescaled_bulk", from_base_model=True, save_config=True) ase.io.write(os.path.join("..",run_root+"-rescaled-bulk.xyz"), bulk_supercell, format='extxyz') print("got bulk primitive cell ", bulk.get_cell()) print("got rescaled bulk_supercell cell ", bulk_supercell.get_cell()) if bulk_supercell.info['vacancies'] == "inequivalent": sym_data = spglib.get_symmetry_dataset(bulk_supercell, symprec=0.01) prim_vacancy_list = np.unique(sym_data["equivalent_atoms"]) print("orig cell vacancy_list", prim_vacancy_list) if 'arb_supercell' in bulk_supercell.info: print("making bulk supercell from", bulk_supercell.info['arb_supercell'].reshape((3,3)) ) bulk_supersupercell = ase.build.make_supercell(bulk_supercell,bulk_supercell.info['arb_supercell'].reshape((3,3)) ) print("got supersupercell with ",len(bulk_supersupercell),"atoms, cell\n",bulk_supersupercell.get_cell()) vacancy_list = [] for i in prim_vacancy_list: p = bulk_supercell.get_positions()[i] dv = bulk_supersupercell.get_positions() - p dv_scaled = np.dot(dv, bulk_supersupercell.get_reciprocal_cell().T) dv -= np.dot(np.round(dv_scaled), bulk_supersupercell.get_cell()) i_closest = np.argmin(np.linalg.norm(dv, axis=1)) print("found closest in new cell", i_closest, "distance in orig cell lattice coords", np.dot((bulk_supersupercell.get_positions()[i_closest]-p), \ bulk_supercell.get_reciprocal_cell().T)) vacancy_list.append(i_closest) bulk_supersupercell.info.update(bulk_supercell.info) bulk_supercell = bulk_supersupercell else: vacancy_list = prim_vacancy_list print("final vacancy_list", vacancy_list) else: try: vacancy_list = [ int(i) for i in bulk_supercell.info['vacancies'] ] except: vacancy_list = [ int(bulk_supercell.info['vacancies']) ] evaluate(bulk_supercell) bulk_supercell_pe = bulk_supercell.get_potential_energy() properties = { "bulk_struct_test" : bulk_supercell.info["bulk_struct_test"], "bulk_E_per_atom" : bulk_supercell_pe / len(bulk_supercell), "defects" : {} } for vac_i in vacancy_list: # maybe set up a system to read these from xyz file? try: relax_radial = bulk_supercell.info['relax_radial_{}'.format(vac_i)] except: relax_radial = 0.0 try: relax_symm_break = bulk_supercell.info['relax_symm_break_{}'.format(vac_i)] except: relax_symm_break = 0.0 (label, unrelaxed_filename, Ef0, relaxed_filename, Ef, vac_Z, vac_pos) = do_one_vacancy(bulk_supercell, bulk_supercell_pe, vac_i, relax_radial, relax_symm_break, nn_cutoff, tol) properties["defects"][label] = { 'Ef0' : Ef0, 'Ef' : Ef, 'unrelaxed_filename' : unrelaxed_filename,'relaxed_filename' : relaxed_filename, 'atom_ind' : int(vac_i), 'Z' : int(vac_Z), 'vac_pos' : vac_pos.tolist()} if len(set(bulk_supercell.get_atomic_numbers())) > 1: properties["defects"][label]["dmu"] = [1, vac_Z] print("returning properties", properties) return properties