def __make_one_dataset(self, vasp_dir):
        test = VASP_DataExtract(vasp_dir=vasp_dir)
        test.get_atom_and_position_info()
        a = test.get_output_as_atom3Dspace()

        if len(a.atoms_pos_info) <= 4:  # 如果样本不够,这是很可能出现的
            print_file("No enough samples for %s, which have %s." %
                       (vasp_dir, len(a.atoms_pos_info)))
            del self.total_info[vasp_dir]
            return

        print_file("vasp_dir %s have sample %s" %
                   (vasp_dir, len(a.atoms_pos_info)))

        self.total_info[vasp_dir]["generated"] = 1
        # 这里的x y不是坐标而是坐标x和能量y
        self.total_info[vasp_dir]['x'], self.total_info[vasp_dir][
            'y'], atom_cases = a.generate_data()
        self.atom_cases = self.atom_cases.union(atom_cases)
        print("AtomCases", self.atom_cases)

        self.total_info[vasp_dir]['atom_cases'] = self.atom_cases
Exemple #2
0
    def from_vasp_dir_and_energy_list(vasp_dirs,
                                      slab_energy=None,
                                      final_ads_energy=None,
                                      only_x=False,
                                      description=""):
        '''

        if use slab energy, y will be energy of every step - slab energy
        if use final_energy, y will be energy of every step - energy of final step + final energy


        '''
        if only_x == False:
            assert slab_energy is not None or final_ads_energy is not None, "At least feed one reference energy"
            if (slab_energy is not None and final_ads_energy is not None):
                raise ValueError("Can only feed one type of energy!")
            if slab_energy is not None: use_slab_energy = True
            else: use_slab_energy = False

        coordinate = []
        energy = []
        box_tensor = []
        print("\nNow extracting data from Vasp ... ")
        for i in tqdm.trange(len(vasp_dirs)):
            vasp_dir = vasp_dirs[i]
            vde = VASP_DataExtract(vasp_dir)
            coord, _, _ = vde.get_output_as_atom3Dspace().generate_data()
            e = vde.get_energy_info()
            vde.get_box_tensor_and_type_info()
            _box_tensor = vde.box_tensor_info
            _t = []
            for j in range(len(e)):
                _t.append(e[j + 1])
            e = _t
            coordinate.append(coord)
            if only_x == False:
                if use_slab_energy:
                    energy.append(np.array(e) - float(slab_energy[i]))
                else:
                    energy.append(
                        np.array(e) - float(e[-1]) +
                        float(final_ads_energy[i]))
            box_tensor.append(_box_tensor)
        if only_x == True:
            return Dataset(coordinate,
                           energy,
                           box_tensor,
                           only_x=True,
                           description=description)

        else:
            return Dataset(coordinate,
                           energy,
                           box_tensor,
                           only_x=False,
                           description=description)
Exemple #3
0
    def generate_vasp_dir_energy_table(vasp_dir, to_csv=False):
        '''
        give a dir contains many vasp dirs, like the dir named Vasp, that contrains: Pt_OH1, Pt_OH2, Pt_OH3 ...
        and generate a excel/csv table like:
        Vasp Dirs | slab energy
        Pt_OH1    | 0
        Pt_OH2    | 0
        Pt_OH3    | 0
        and then you need to fill the slab energy
        '''
        vasp_dirs = VASP_DataExtract.get_all_VASP_dirs_in_dir(vasp_dir)
        energy = [0 for _ in range(len(vasp_dirs))]
        t = np.array([vasp_dirs, energy])
        t = t.T

        data = pd.DataFrame(t)
        data.stack(level=-1)
        data.columns = ["Vasp Dirs", "slab energy"]
        if to_csv:
            data.to_csv("vasp_dir_path_energy_table.csv", index=False)
        else:
            data.to_excel("vasp_dir_path_energy_table.xlsx", index=False)
Exemple #4
0
from VDE.VASPMoleculeFeature import VASP_DataExtract
from soapml.SOAPTransformer import SOAPTransformer

test = VASP_DataExtract("/home/yb/Desktop/Dataset/碳纳米管掺杂/整理后/5-5/b/h/3")
a = test.get_output_as_atom3Dspace()
coord, energy, atom_cases = a.generate_data()
atom_cases = set(atom_cases)
atom_cases.update([5, 7, 8])  # add B and N
atom_cases.remove(1)  # remove H
#atom_cases.remove(6) # remove C
transformer = SOAPTransformer(encode_atom_cases=atom_cases)

for i in range(coord.shape[0]):
    sample = coord[-1, :, :]
    x = transformer.transform(sample, [[0, 0, 0]])
    print(x.shape)
    print(x[..., 0])
    print(...)
    exit()
Exemple #5
0
            np.save(save_file, self.total_encoded_grid)
        return self.total_encoded_grid

    def debug_encoded_grid(self, file_name="encoded_grid.npy"):
        temp = np.load(file_name)
        print(temp[0, 0, 0])
        print(temp[0, 0, temp.shape[2] - 1])
        print(temp[0, temp.shape[1] - 1, temp.shape[2] - 1])
        print(temp[temp.shape[0] - 1, temp.shape[1] - 1, temp.shape[2] - 1])

    def load_encoded_grid(self, file_name):
        return np.load(file_name)


if __name__ == '__main__':
    a = VASP_DataExtract(vasp_dir="data\Pd_CH_s_fcc_stand.cif")
    c = a.get_output_as_atom3Dspace()
    e = a.get_energy_info()

    coord, energy, atom_case = c.generate_data()

    # 1 得到坐标的atom case的信息,输入Vasp dir
    # print(coord)
    a = AtomGrid(coord, atom_case)

    # 2 得到边界信息,然后根据边界人为确定格子大小和边界
    a.get_grid_border()
    a.make_grid(minX=-6,
                maxX=15,
                minY=-6,
                maxY=15,
Exemple #6
0
def encode_method_test():
    a = VASP_DataExtract(vasp_dir="data\Pd_CH_s_fcc_stand.cif")
    c = a.get_output_as_atom3Dspace()
    e = a.get_energy_info()

    coord, energy, atom_case = c.generate_data()

    a = AtomGrid(coord, atom_case)

    a.get_grid_border()
    a.make_grid(minX=-0.5,
                maxX=7,
                minY=-3,
                maxY=7.5,
                minZ=-0.5,
                maxZ=9.5,
                resolutionX=100,
                resolutionY=100,
                resolutionZ=100)

    train_index = [1]

    en2 = a.grid_encode(train_index)[0]
    en1 = a.grid_encode1(train_index)[0]

    # en2 方法比en1快很多很多,同时要比较一下编码结果是否一致
    print(en1)
    print("_______________________")
    print(en2)
    print(en1 - en2)
    print(np.sum(en1 - en2))
Exemple #7
0
def make_dataset(vasp_dir, atom_case, save=False, max_dataset_number=-1):
    a = VASP_DataExtract(vasp_dir=vasp_dir)

    vasp_dir_name = vasp_dir.split("/")[-1].split('\\')[-1]

    c = a.get_output_as_atom3Dspace()
    e = a.get_energy_info()

    coord, energy, _atom_case = c.generate_data()

    for i in _atom_case:
        if i not in atom_case:
            raise ValueError(
                "Input Atom Case Didn't Contain %s, make sure all atom cases are in atom_case"
                % _atom_case)
    atom_case = atom_case

    # 1 得到坐标的atom case的信息,输入Vasp dir
    # print(coord)
    a = AtomGrid(coord, atom_case)

    # 2 得到边界信息,然后根据边界人为确定格子大小和边界
    a.get_grid_border()
    a.make_grid(minX=-6,
                maxX=15,
                minY=-6,
                maxY=15,
                minZ=-6,
                maxZ=15,
                resolutionX=60,
                resolutionY=60,
                resolutionZ=60)

    # 3 进行编码,给出编码的index,打乱分数据集,然后存储不同的数据集

    all_index = list(range(coord.shape[0]))[:max_dataset_number]
    np.random.shuffle(all_index)
    print(all_index)
    sample_num = len(all_index)
    train_test_ratio = 0.7
    trainX = testX = None
    train_index = all_index[:int(sample_num * train_test_ratio)]
    test_index = all_index[int(sample_num * train_test_ratio):]
    if (save != False):
        a.grid_encode(train_index, "%s_trainX.npy" % vasp_dir_name)
        a.grid_encode(test_index, "%s_testX.npy" % vasp_dir_name)
    else:
        trainX = a.grid_encode(train_index, save=False)
        testX = a.grid_encode(test_index, save=False)

    trainY = []
    testY = []
    for i in train_index:
        trainY.append(energy[i])
    for i in test_index:
        testY.append(energy[i])

    trainY = np.array(trainY)
    testY = np.array(testY)
    if save != False:
        np.save("%s_trainY.npy" % vasp_dir_name, trainY)
        np.save("%s_testY.npy" % vasp_dir_name, testY)
    else:
        return trainX, trainY, testX, testY
Exemple #8
0

if __name__ == '__main__':

    # TODO: 增加交替训练,这很重要!!
    all_true_y = []
    all_pred_y = []

    atom_case = (1, 6, 8, 78)

    np.random.seed(1)

    a = GridConv3D(60, 60, 60, 4)  # 四种元素,先训练CHOPt都有的
    a.build()

    all_vasp_dir = VASP_DataExtract.get_all_VASP_dirs_in_dir(
        os.getcwd())  # 获得当前的VASP文件夹作为训练集
    index = 0
    for vasp_dir in all_vasp_dir:
        index += 1
        print("train for %s" % vasp_dir)
        print("Total Process: %s/%s" % (index, len(all_vasp_dir)))
        # 选择不保存
        trainX, trainY, testX, testY = make_dataset(
            vasp_dir, save=False, max_dataset_number=50,
            atom_case=atom_case)  # 最多50个sample拿来训练
        # 如果保存可以载入
        #trainX,trainY,testX,testY = load_dataset(vasp_dir)

        a.fit(trainX, trainY, epochs=1, batch_size=1)

    for vasp_dir in all_vasp_dir: