def read_data(address, struct_num):
    xlist = []
    ylist = []
    counter = 0
    for dir in os.listdir(address):
        counter += 1
        if counter % 10 == 0:
            print("number of structures read: %d" % counter, end="\n")
        if counter > struct_num and struct_num > 0:
            break
        # print("Structure ", counter, " is ", dir)
        poscar = address / dir / "POSCAR"
        oszicar = address / dir / "OSZICAR"

        atoms, cell = utils.read_poscar(poscar)
        atoms = utils.CN(atoms, cell)
        nis_num = len(atoms)/2
        code = utils.struc_code(atoms)

        xlist.append(code)
        eng = utils.read_oszicar(oszicar)
        eng_bar = eng - eng_NiS * nis_num
        ylist.append(eng_bar)

        xsum = np.sum(code)
        y_temp = np.divide(eng_bar, xsum)

    x = np.array(xlist).T
    y = np.array(ylist)

    return x, y
xlist = []
ylist = []
for dir_path in os.listdir(address):
    counter += 1
    print("Structure %s" % dir_path, end="\n")
    # first read the unrelaxed structure
    oszicar = address / dir_path / "OSZICAR"
    if os.path.isfile(oszicar):
        eng = utils.read_oszicar(
            oszicar
        )  # red energy from oszicar. eng = 0.0 if simulation is not finished
        if eng != 0.0:  # deal with the simulation onle if it is finished
            poscar = address / dir_path / "POSCAR"
            atoms, cell = utils.read_poscar(poscar)
            atom_num = len(atoms)
            atoms = utils.CN(atoms, cell)
            atoms = utils.steinhardt(atoms, cell, 2.55, [4, 6, 8, 10])
            x, y = sinle_example(atoms, eng)
            xlist.append(x)
            ylist.append(y)
x = np.array(xlist)
y = np.array(ylist)

print(x.shape)
print(y.shape)
print(np.max(x))

h5f = h5py.File('old_datasets/surface_2D.h5', 'w')
h5f.create_dataset('x', data=x)
h5f.create_dataset('y', data=y)
h5f.close()
    command = 'source /opt/LAMMPS/lammps-intel.sh; lmp_intel < ' + str(
        address) + '/in.temp > ' + str(address) + '/out.lmp'
    os.system(command)
    # os.system('lmp_intel < in.temp')


for dir in os.listdir(address):
    print(dir)
    #
    poscar = address / dir / "POSCAR"
    # potcar = address / dir / "POTCAR"
    # incar = address / dir / "INCAR"
    # kpoints = address / dir / "KPOINTS"
    # # read atoms from the original POSCAR file
    atoms, cell = utils.read_poscar(poscar)
    atoms = utils.CN(atoms, cell)
    #
    # # Make GEOM_OPT directory to geometry optimize the structure with lammps
    geom_opt_dir = address / dir / "GEOM_OPT"
    # if os.path.isdir(geom_opt_dir):
    #     command = 'rm -rf ' + str(geom_opt_dir)
    #     os.system(command)
    #
    # os.mkdir(geom_opt_dir)
    # # copy VASP files to the GEOM_OPT directory
    # cp_command = 'cp ' + str(incar) + ' ' + str(geom_opt_dir)
    # os.system(cp_command)
    # cp_command = 'cp ' + str(poscar) + ' ' + str(geom_opt_dir)
    # os.system(cp_command)
    # cp_command = 'cp ' + str(potcar) + ' ' + str(geom_opt_dir)
    # os.system(cp_command)