def create_dataframe_multi_index(inp_path, section='CURVES'): # format the section header for look up in headers OrderedDict sect = remove_braces(section).upper() # get list of all section headers in inp to use as section ending flags headers = get_inp_sections_details(inp_path, include_brackets=False) if sect not in headers: warnings.warn(f'{sect} section not found in {inp_path}') return pd.DataFrame() # extract the string and read into a dataframe start_string = format_inp_section_header(section) end_strings = [format_inp_section_header(h) for h in headers.keys()] s = extract_section_of_file(inp_path, start_string, end_strings) cols = headers[sect]['columns'] f = StringIO(s) data = [] for line in f.readlines(): items = line.strip().split() if len(items) == 3: items = [items[0], None, items[1], items[2]] if len(items) == 4: data.append(items) df = pd.DataFrame(data=data, columns=cols) df = df.set_index(['Name', 'Type']) return df
def dataframe_from_inp(inp_path, section, additional_cols=None, quote_replace=' ', **kwargs): """ create a dataframe from a section of an INP file :param inp_path: :param section: :param additional_cols: :param skip_headers: :param quote_replace: :return: """ # format the section header for look up in headers OrderedDict sect = remove_braces(section).upper() # get list of all section headers in inp to use as section ending flags headers = get_inp_sections_details(inp_path, include_brackets=False) if sect not in headers: warnings.warn(f'{sect} section not found in {inp_path}') return pd.DataFrame() # extract the string and read into a dataframe start_string = format_inp_section_header(section) end_strings = [format_inp_section_header(h) for h in headers.keys()] s = extract_section_of_file(inp_path, start_string, end_strings, **kwargs) # replace occurrences of double quotes "" s = s.replace('""', quote_replace) # and get the list of columns to use for parsing this section # add any additional columns needed for special cases (build instructions) additional_cols = [] if additional_cols is None else additional_cols cols = headers[sect]['columns'] + additional_cols if headers[sect]['columns'][0] == 'blob': # return the whole row, without specific col headers return pd.read_csv(StringIO(s), delim_whitespace=False) else: try: df = pd.read_csv(StringIO(s), header=None, delim_whitespace=True, skiprows=[0], index_col=0, names=cols) except IndexError: print( f'failed to parse {section} with cols: {cols}. head:\n{s[:500]}' ) raise return df