コード例 #1
0
def extract_wrapped(simname, format_spec=['id', 'type', 'xs', 'ys', 'zs']):
    """Extract coordinates of all atoms from wrapped trajectory files with 
    format 'id','type','xs','ys','zs'
    Args:
    simname (str): name of the LAMMPS simulation.

    Returns:
    coord (dict): Dictionary with the number of timesteps as keys and 
                  coordinates of all atoms at the corresponding timestep as 
                  a pandas dataframe. The format of keys is:
                  'timestep_<timestep>'
                  For ex:
                  'timestep_0'
    """
    dump_wrapped, dump_unwrapped, dump_def1,dump_def2,\
        dump_def3, log_file=fileIO.retrieve_different_filetypes(simname)
    path = fileIO.findInSubdirectory(dump_wrapped)
    unwrap = pd.read_csv(path, header=None, index_col=False)
    index = 0
    natoms_notfound = True
    while natoms_notfound:
        try:
            if unwrap.iloc[index].str.split()[0][1] == 'NUMBER':
                natoms_notfound = False
            else:
                index += 1
        except IndexError:
            index += 1
    natoms = int(unwrap.iloc[index + 1].str.split()[0][0])
    natoms = int(unwrap.iloc[index + 1].str.split()[0][0])
    index = 0
    timestep = 0
    coord = {}
    while index < len(unwrap):
        line = unwrap.iloc[index].str.split()
        try:
            if line[0][0] == 'ITEM:' and line[0][1] == 'ATOMS':
                length = len(unwrap.iloc[index + 1].str.split()[0])
                df2 = unwrap.iloc[index + 1:index + natoms + 1]
                df2 = df2[0].str.split(' ', length - 1, expand=True)
                #Split based on separator - expensive
                num2str = lambda x: float(x)
                #convert all elements from str to float
                df2 = df2.applymap(num2str)
                #apply num2str to every element - expensive
                df2.columns = format_spec
                #add corresponding column labels
                df2 = df2.sort_values(by=['id'])
                #sort based on atom id so that future operations are easy.
                key = 'timestep_' + str(timestep)
                #save in the corresponding dictionary.
                coord[key] = df2
                index = index + natoms
                timestep += 1
            else:
                index += 1
        except IndexError:
            index += 1
    return coord
コード例 #2
0
def read_boxsize_generic(simname, nc, dp, last_timestep=173):
    """Reads the box size of the simulation from the unwrapped coordinates 
    trajectory.

    Args:
    simname (str): name of the LAMMPS simulation.
    
    Returns:
    prop_constant (np array): an array  containing the proportionality
                              constant pertaining to the increase in size of
                              the box (along x, y and z) at every timestep.
    """
    dump_wrapped, dump_unwrapped, dump_def1, dump_def2,\
        dump_def3, log_file=fileIO.retrieve_different_filetypes(simname)
    path = fileIO.findInSubdirectory(dump_unwrapped)
    unwrap = pd.read_csv(path, header=None, index_col=False)
    index = 0

    timestep = 1
    prev_index = -(nc * dp + 4)
    next_x_add = nc * dp + 9
    next_y_add = nc * dp + 10
    next_z_add = nc * dp + 11
    ref_size_x = (
        float(unwrap.iloc[prev_index + next_x_add].str.split()[0][1]) -
        float(unwrap.iloc[prev_index + next_x_add].str.split()[0][0]))
    print(ref_size_x)
    ref_size_y = (
        float(unwrap.iloc[prev_index + next_y_add].str.split()[0][1]) -
        float(unwrap.iloc[prev_index + next_y_add].str.split()[0][0]))
    ref_size_z = (
        float(unwrap.iloc[prev_index + next_z_add].str.split()[0][1]) -
        float(unwrap.iloc[prev_index + next_z_add].str.split()[0][0]))
    prop_constant = [None] * last_timestep
    while timestep <= last_timestep:
        curr_size_x = (
            float(unwrap.iloc[prev_index + next_x_add].str.split()[0][1]) -
            float(unwrap.iloc[prev_index +
                              next_x_add].str.split()[0][0])) / ref_size_x
        curr_size_y = (
            float(unwrap.iloc[prev_index + next_y_add].str.split()[0][1]) -
            float(unwrap.iloc[prev_index +
                              next_y_add].str.split()[0][0])) / ref_size_y
        curr_size_z = (
            float(unwrap.iloc[prev_index + next_z_add].str.split()[0][1]) -
            float(unwrap.iloc[prev_index +
                              next_z_add].str.split()[0][0])) / ref_size_z
        px = round(curr_size_x, 2)
        py = round(curr_size_y, 2)
        pz = round(curr_size_z, 2)
        prop_constant[timestep - 1] = [px, py, pz]
        prev_index += next_x_add
        timestep += 1
    return prop_constant
コード例 #3
0
 def test_retrieve_filetypes(self):
     """Asserts file type returns for the given file"""
     rootname = 'test_file'
     dump_wrapped = rootname + '.lammpstrj'
     dump_unwrapped = rootname + '.unwrapped.lammpstrj'
     dump_def1 = rootname + '.def1.txt'
     dump_def2 = rootname + '.def2.txt'
     dump_def3 = rootname + '.def3.txt'
     log_file = 'log.' + rootname + '.txt'
     self.assertEqual(fileIO.retrieve_different_filetypes('test_file'),
                      (dump_wrapped, dump_unwrapped, dump_def1, dump_def2,
                       dump_def3, log_file))
コード例 #4
0
def extract_log_thermo(simname):
    """Read and return the thermodynamic data from a LAMMMPS simulation file.
    Args:
    simname (str): name of the simulation.
    
    Returns:
    log_thermo (dict): Dictionary containing the number of runs in the current
                       file and corresponding pandas dataframe containing all 
                       thermodynamic output regarding that run file.
    """
    dump_wrapped, dump_unwrapped, dump_def1, dump_def2,\
        dump_def3, log_file=fileIO.retrieve_different_filetypes(simname)
    path = fileIO.findInSubdirectory(log_file)
    df3 = pd.read_csv(path,
                      sep='nevergonnahappen',
                      engine='python',
                      index_col=False,
                      header=None)
    natoms = []
    log_thermo = {}
    run_num = 0
    for index in df3.index:
        line = df3.iloc[index].str.split()
        try:
            if line[0][0] == 'Per' and line[0][1] == 'MPI':
                try:
                    float(df3.iloc[index + 3].str.split()[0][0])
                    #To make sure we do not include "run 0" runs.
                    col_names = df3.iloc[index + 1].str.split()[0]
                    run_num += 1
                    #%%%%%%%%%%%%% FIND RUN and THERMO numbers %%%%%%%%%%%%%%%
                    run_index = index - 1
                    while df3.iloc[run_index].str.split()[0][0] != 'run':
                        run_index -= 1
                    run = df3.iloc[run_index].str.split()[0][1]
                    thermo_index = run_index - 1
                    while df3.iloc[thermo_index].str.split()[0][0] != 'thermo':
                        thermo_index -= 1
                    thermo = df3.iloc[thermo_index].str.split()[0][1]
                    output_num = int(int(run) / int(thermo))
                    natoms.append(df3.iloc[index + output_num +
                                           3].str.split()[0][-2])
                    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

                    #%%%%%%%%%%%%%%%% Read thermo data %%%%%%%%%%%%%%%%%%%%%%%
                    df3.iloc[index + 2:index + output_num + 3].to_csv(
                        r'.//test.csv', index=False, header=None)
                    #write dummy
                    # file
                    df2 = pd.read_csv('./test.csv',
                                      delim_whitespace=True,
                                      names=col_names,
                                      dtype=np.float64)
                    #read dummy file
                    key = 'run_' + str(run_num)
                    #save in the corresponding dictionary.
                    log_thermo[key] = df2
                    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
                except ValueError:
                    pass
        except IndexError:
            pass

    return log_thermo
コード例 #5
0
def extract_def(simname, syntax='18f'):
    """
    This function reads the given LAMMPS simulation name (if present somewhere
    in the current directory or subdirectories) and returns a numpy array 
    containing the file's output (log, lammpstrj, def1, def2).
    
    *def* files are usually created by Raiter during deformations. They are 
    clutter-free and contain only numbers. This function will NOT work for log
    files. For log files, please check extract_log_thermo function.
    
    Args:
    simname (str): name of the *def* file
    syntax (str): the format specification of the requested def file. Default
     is '18f'
    
    Returns:
    def1 (dataframe): Pandas dataframe containing the output from the *def1*
                      file.
    def2 (dataframe): Pandas dataframe containing output from the *def2* file.
    """

    dump_wrapped, dump_unwrapped, dump_def1,dump_def2,\
        dump_def3, log_file=fileIO.retrieve_different_filetypes(simname)
    path = fileIO.findInSubdirectory(dump_def1)
    if syntax == '18f':
        column_names = [
            'strain', 'pxx', 'pyy', 'pzz', 'lx', 'ly', 'lz', 'temp', 'epair',
            'ebond', 'eangle', 'edihed', 'ecoul', 'evdwl', 'etotal', 'pe',
            'ke', 'density'
        ]
    elif syntax == 'old':
        column_names = ['']  #FILL IN LATER
    def1 = pd.read_csv(path,
                       delim_whitespace=True,
                       skiprows=1,
                       dtype=np.float64,
                       names=column_names,
                       index_col=False)  #Read and skip the first line
    path = fileIO.findInSubdirectory(dump_def2)
    if syntax == '18f':
        column_names = [
            "strain", "c_meansquarep[1]", "c_meansquarep[2]",
            "c_meansquarep[3]", "c_meansquarep[4]", "c_meansquaren[1]",
            "c_meansquaren[2]", "c_meansquaren[3]", "c_meansquaren[4]",
            "c_meansquarei[1]", "c_meansquarei[2]", "c_meansquarei[3]",
            "c_meansquarei[4]", "c_meansquareu[1]", "c_meansquareu[2]",
            "c_meansquareu[3]", "c_meansquareu[4]", "c_nongaussp[1]",
            "c_nongaussp[2]", "c_nongaussp[3]", "c_nongaussn[1]",
            "c_nongaussn[2]", "c_nongaussn[3]", "c_nongaussi[1]",
            "c_nongaussi[2]", "c_nongaussi[3]", "c_nongaussu[1]",
            "c_nongaussu[2]", "c_nongaussu[3]", "c_msdall[1]", "c_msdall[2]",
            "c_msdall[3]", "c_msdall[4]", "c_ngpall[1]", "c_ngpall[2]",
            "c_ngpall[3]"
        ]
    def2 = pd.read_csv(path,
                       delim_whitespace=True,
                       skiprows=1,
                       dtype=np.float64,
                       names=column_names,
                       index_col=False)
    return def1, def2
コード例 #6
0
def extract_unwrapped(simname,
                      format_spec=['id', 'mol', 'type', 'xu', 'yu', 'zu'],
                      first_only=False,
                      last_only=False,
                      boxsize=False,
                      boxsize_whole=False,
                      start_end=False):
    """Extract coordinates of all atoms from unwrapped trajectory files with 
    format 'id','mol','type','xu','yu','zu'
    Args:
    simname (str): name of the LAMMPS simulation.

    Returns:
    coord (dict): Dictionary with the number of timesteps as keys and 
                  coordinates of all atoms at the corresponding timestep as 
                  a pandas dataframe.
    """

    dump_wrapped, dump_unwrapped, dump_def1,dump_def2,\
        dump_def3, log_file=fileIO.retrieve_different_filetypes(simname)
    path = fileIO.findInSubdirectory(dump_unwrapped)
    unwrap = pd.read_csv(path, header=None, index_col=False)
    index = 0
    natoms_notfound = True
    while natoms_notfound:
        try:
            if unwrap.iloc[index].str.split()[0][1] == 'NUMBER':
                natoms_notfound = False
            else:
                index += 1
        except IndexError:
            index += 1
    natoms = int(unwrap.iloc[index + 1].str.split()[0][0])
    first_timestep_lines = natoms + 9
    index = 0
    timestep = 0
    coord = {}
    if first_only:
        while index < first_timestep_lines:
            line = unwrap.iloc[index].str.split()
            try:
                if line[0][0] == 'ITEM:' and line[0][1] == 'ATOMS':
                    length = len(unwrap.iloc[index + 1].str.split()[0])
                    df2 = unwrap.iloc[index + 1:index + natoms + 1]
                    df2 = df2[0].str.split(' ', length - 1, expand=True)
                    #Split based on separator - expensive
                    num2str = lambda x: float(x)
                    #convert all elements from str to float
                    df2 = df2.applymap(num2str)
                    #apply num2str to every element - expensive
                    df2.columns = format_spec
                    #add corresponding column labels
                    df2 = df2.sort_values(by=['id'])
                    #sort based on atom id so that future operations
                    # are easy.
                    key = 'timestep_' + str(timestep)
                    #save in the corresponding dictionary.
                    coord[key] = df2
                    index = index + natoms
                    timestep += 1
                else:
                    index += 1
            except IndexError:
                index += 1
    elif last_only:
        index = len(unwrap) - natoms - 15  #15 is an arbitrary chosen value
        while index < len(unwrap):
            line = unwrap.iloc[index].str.split()
            try:
                if line[0][0] == 'ITEM:' and line[0][1] == 'ATOMS':
                    length = len(unwrap.iloc[index + 1].str.split()[0])
                    df2 = unwrap.iloc[index + 1:index + natoms + 1]
                    df2 = df2[0].str.split(' ', length - 1, expand=True)
                    #Split based on separator - expensive
                    num2str = lambda x: float(x)
                    #convert all elements from str to float
                    df2 = df2.applymap(num2str)
                    #apply num2str to every element - expensive
                    df2.columns = format_spec
                    #add corresponding column labels
                    df2 = df2.sort_values(by=['id'])
                    #sort based on atom id so that future operations
                    # are easy.
                    key = 'timestep_' + str(timestep)
                    #save in the corresponding dictionary.
                    coord[key] = df2
                    index = index + natoms
                    timestep += 1
                else:
                    index += 1
            except IndexError:
                index += 1

    else:
        while index < len(unwrap):
            line = unwrap.iloc[index].str.split()
            try:
                if line[0][0] == 'ITEM:' and line[0][1] == 'ATOMS':
                    length = len(unwrap.iloc[index + 1].str.split()[0])
                    df2 = unwrap.iloc[index + 1:index + natoms + 1]
                    df2 = df2[0].str.split(' ', length - 1, expand=True)
                    #Split based on separator - expensive
                    num2str = lambda x: float(x)
                    #convert all elements from str to float
                    df2 = df2.applymap(num2str)
                    #apply num2str to every element - expensive
                    df2.columns = format_spec
                    #add corresponding column labels
                    df2 = df2.sort_values(by=['id'])
                    #sort based on atom id so that future operations
                    # are easy.
                    key = 'timestep_' + str(timestep)
                    #save in the corresponding dictionary.
                    coord[key] = df2
                    index = index + natoms
                    timestep += 1
                else:
                    index += 1
            except IndexError:
                index += 1
    if boxsize:
        index = 0
        boxsizev = [None, None, None]
        if first_only:
            while index < first_timestep_lines:
                line = unwrap.iloc[index].str.split()
                try:
                    if line[0][0] == 'ITEM:' and line[0][1] == 'BOX':
                        df2 = unwrap.iloc[index + 1:index + 2]
                        df2 = df2[0].str.split(' ').values
                        boxsizev[0] = float(df2[0][1]) - float(df2[0][0])
                        dfy = unwrap.iloc[index + 2:index + 3]
                        dfy = dfy[0].str.split(' ').values
                        boxsizev[1] = float(dfy[0][1]) - float(dfy[0][0])
                        dfz = unwrap.iloc[index + 3:index + 4]
                        dfz = dfz[0].str.split(' ').values
                        boxsizev[2] = float(dfz[0][1]) - float(dfz[0][0])
                        if start_end:
                            return coord, boxsizev, float(df2[0][1]), float(
                                df2[0][0]), float(dfy[0][1]), float(
                                    dfy[0][0]), float(dfz[0][1]), float(
                                        dfz[0][0])
                        else:
                            return coord, boxsizev
                    else:
                        index += 1
                except IndexError:
                    index += 1
        if last_only:
            index = len(unwrap) - natoms - 20  #15 is an arbitrary chosen value
            while index < len(unwrap):
                line = unwrap.iloc[index].str.split()
                try:
                    if line[0][0] == 'ITEM:' and line[0][1] == 'BOX':
                        df2 = unwrap.iloc[index + 1:index + 2]
                        df2 = df2[0].str.split(' ').values
                        boxsizev[0] = float(df2[0][1]) - float(df2[0][0])
                        dfy = unwrap.iloc[index + 2:index + 3]
                        dfy = dfy[0].str.split(' ').values
                        boxsizev[1] = float(dfy[0][1]) - float(dfy[0][0])
                        dfz = unwrap.iloc[index + 3:index + 4]
                        dfz = dfz[0].str.split(' ').values
                        boxsizev[2] = float(dfz[0][1]) - float(dfz[0][0])
                        return coord, boxsizev
                    else:
                        index += 1
                except IndexError:
                    index += 1
    if boxsize_whole:
        index = 0
        timestep = 0
        boxsize_whole_dict = {}
        boxsizev = [None, None, None]
        while index < len(unwrap):
            line = unwrap.iloc[index].str.split()
            try:
                if line[0][0] == 'ITEM:' and line[0][1] == 'BOX':
                    df2 = unwrap.iloc[index + 1:index + 2]
                    df2 = df2[0].str.split(' ').values
                    boxsizev[0] = float(df2[0][1]) - float(df2[0][0])
                    dfy = unwrap.iloc[index + 2:index + 3]
                    dfy = dfy[0].str.split(' ').values
                    boxsizev[1] = float(dfy[0][1]) - float(dfy[0][0])
                    dfz = unwrap.iloc[index + 3:index + 4]
                    dfz = dfz[0].str.split(' ').values
                    boxsizev[2] = float(dfz[0][1]) - float(dfz[0][0])
                    key = 'timestep_' + str(timestep)
                    boxsize_whole_dict[key] = [
                        boxsizev[0], boxsizev[1], boxsizev[2]
                    ]
                    index = index + natoms
                    timestep += 1
                else:
                    index += 1
            except IndexError:
                index += 1
        return coord, boxsize_whole_dict

    return coord