def encode_method_test(): a = VASP_DataExtract(vasp_dir="data\Pd_CH_s_fcc_stand.cif") c = a.get_output_as_atom3Dspace() e = a.get_energy_info() coord, energy, atom_case = c.generate_data() a = AtomGrid(coord, atom_case) a.get_grid_border() a.make_grid(minX=-0.5, maxX=7, minY=-3, maxY=7.5, minZ=-0.5, maxZ=9.5, resolutionX=100, resolutionY=100, resolutionZ=100) train_index = [1] en2 = a.grid_encode(train_index)[0] en1 = a.grid_encode1(train_index)[0] # en2 方法比en1快很多很多,同时要比较一下编码结果是否一致 print(en1) print("_______________________") print(en2) print(en1 - en2) print(np.sum(en1 - en2))
def from_vasp_dir_and_energy_list(vasp_dirs, slab_energy=None, final_ads_energy=None, only_x=False, description=""): ''' if use slab energy, y will be energy of every step - slab energy if use final_energy, y will be energy of every step - energy of final step + final energy ''' if only_x == False: assert slab_energy is not None or final_ads_energy is not None, "At least feed one reference energy" if (slab_energy is not None and final_ads_energy is not None): raise ValueError("Can only feed one type of energy!") if slab_energy is not None: use_slab_energy = True else: use_slab_energy = False coordinate = [] energy = [] box_tensor = [] print("\nNow extracting data from Vasp ... ") for i in tqdm.trange(len(vasp_dirs)): vasp_dir = vasp_dirs[i] vde = VASP_DataExtract(vasp_dir) coord, _, _ = vde.get_output_as_atom3Dspace().generate_data() e = vde.get_energy_info() vde.get_box_tensor_and_type_info() _box_tensor = vde.box_tensor_info _t = [] for j in range(len(e)): _t.append(e[j + 1]) e = _t coordinate.append(coord) if only_x == False: if use_slab_energy: energy.append(np.array(e) - float(slab_energy[i])) else: energy.append( np.array(e) - float(e[-1]) + float(final_ads_energy[i])) box_tensor.append(_box_tensor) if only_x == True: return Dataset(coordinate, energy, box_tensor, only_x=True, description=description) else: return Dataset(coordinate, energy, box_tensor, only_x=False, description=description)
def __make_one_dataset(self, vasp_dir): test = VASP_DataExtract(vasp_dir=vasp_dir) test.get_atom_and_position_info() a = test.get_output_as_atom3Dspace() if len(a.atoms_pos_info) <= 4: # 如果样本不够,这是很可能出现的 print_file("No enough samples for %s, which have %s." % (vasp_dir, len(a.atoms_pos_info))) del self.total_info[vasp_dir] return print_file("vasp_dir %s have sample %s" % (vasp_dir, len(a.atoms_pos_info))) self.total_info[vasp_dir]["generated"] = 1 # 这里的x y不是坐标而是坐标x和能量y self.total_info[vasp_dir]['x'], self.total_info[vasp_dir][ 'y'], atom_cases = a.generate_data() self.atom_cases = self.atom_cases.union(atom_cases) print("AtomCases", self.atom_cases) self.total_info[vasp_dir]['atom_cases'] = self.atom_cases
from VDE.VASPMoleculeFeature import VASP_DataExtract from soapml.SOAPTransformer import SOAPTransformer test = VASP_DataExtract("/home/yb/Desktop/Dataset/碳纳米管掺杂/整理后/5-5/b/h/3") a = test.get_output_as_atom3Dspace() coord, energy, atom_cases = a.generate_data() atom_cases = set(atom_cases) atom_cases.update([5, 7, 8]) # add B and N atom_cases.remove(1) # remove H #atom_cases.remove(6) # remove C transformer = SOAPTransformer(encode_atom_cases=atom_cases) for i in range(coord.shape[0]): sample = coord[-1, :, :] x = transformer.transform(sample, [[0, 0, 0]]) print(x.shape) print(x[..., 0]) print(...) exit()
return self.total_encoded_grid def debug_encoded_grid(self, file_name="encoded_grid.npy"): temp = np.load(file_name) print(temp[0, 0, 0]) print(temp[0, 0, temp.shape[2] - 1]) print(temp[0, temp.shape[1] - 1, temp.shape[2] - 1]) print(temp[temp.shape[0] - 1, temp.shape[1] - 1, temp.shape[2] - 1]) def load_encoded_grid(self, file_name): return np.load(file_name) if __name__ == '__main__': a = VASP_DataExtract(vasp_dir="data\Pd_CH_s_fcc_stand.cif") c = a.get_output_as_atom3Dspace() e = a.get_energy_info() coord, energy, atom_case = c.generate_data() # 1 得到坐标的atom case的信息,输入Vasp dir # print(coord) a = AtomGrid(coord, atom_case) # 2 得到边界信息,然后根据边界人为确定格子大小和边界 a.get_grid_border() a.make_grid(minX=-6, maxX=15, minY=-6, maxY=15, minZ=-6,
def make_dataset(vasp_dir, atom_case, save=False, max_dataset_number=-1): a = VASP_DataExtract(vasp_dir=vasp_dir) vasp_dir_name = vasp_dir.split("/")[-1].split('\\')[-1] c = a.get_output_as_atom3Dspace() e = a.get_energy_info() coord, energy, _atom_case = c.generate_data() for i in _atom_case: if i not in atom_case: raise ValueError( "Input Atom Case Didn't Contain %s, make sure all atom cases are in atom_case" % _atom_case) atom_case = atom_case # 1 得到坐标的atom case的信息,输入Vasp dir # print(coord) a = AtomGrid(coord, atom_case) # 2 得到边界信息,然后根据边界人为确定格子大小和边界 a.get_grid_border() a.make_grid(minX=-6, maxX=15, minY=-6, maxY=15, minZ=-6, maxZ=15, resolutionX=60, resolutionY=60, resolutionZ=60) # 3 进行编码,给出编码的index,打乱分数据集,然后存储不同的数据集 all_index = list(range(coord.shape[0]))[:max_dataset_number] np.random.shuffle(all_index) print(all_index) sample_num = len(all_index) train_test_ratio = 0.7 trainX = testX = None train_index = all_index[:int(sample_num * train_test_ratio)] test_index = all_index[int(sample_num * train_test_ratio):] if (save != False): a.grid_encode(train_index, "%s_trainX.npy" % vasp_dir_name) a.grid_encode(test_index, "%s_testX.npy" % vasp_dir_name) else: trainX = a.grid_encode(train_index, save=False) testX = a.grid_encode(test_index, save=False) trainY = [] testY = [] for i in train_index: trainY.append(energy[i]) for i in test_index: testY.append(energy[i]) trainY = np.array(trainY) testY = np.array(testY) if save != False: np.save("%s_trainY.npy" % vasp_dir_name, trainY) np.save("%s_testY.npy" % vasp_dir_name, testY) else: return trainX, trainY, testX, testY