Beispiel #1
0
def create_dataframe_multi_index(inp_path, section='CURVES'):

    # format the section header for look up in headers OrderedDict
    sect = remove_braces(section).upper()

    # get list of all section headers in inp to use as section ending flags
    headers = get_inp_sections_details(inp_path, include_brackets=False)

    if sect not in headers:
        warnings.warn(f'{sect} section not found in {inp_path}')
        return pd.DataFrame()

    # extract the string and read into a dataframe
    start_string = format_inp_section_header(section)
    end_strings = [format_inp_section_header(h) for h in headers.keys()]
    s = extract_section_of_file(inp_path, start_string, end_strings)
    cols = headers[sect]['columns']

    f = StringIO(s)
    data = []
    for line in f.readlines():
        items = line.strip().split()
        if len(items) == 3:
            items = [items[0], None, items[1], items[2]]
        if len(items) == 4:
            data.append(items)

    df = pd.DataFrame(data=data, columns=cols)
    df = df.set_index(['Name', 'Type'])

    return df
Beispiel #2
0
def dataframe_from_inp(inp_path,
                       section,
                       additional_cols=None,
                       quote_replace=' ',
                       **kwargs):
    """
    create a dataframe from a section of an INP file
    :param inp_path:
    :param section:
    :param additional_cols:
    :param skip_headers:
    :param quote_replace:
    :return:
    """

    # format the section header for look up in headers OrderedDict
    sect = remove_braces(section).upper()

    # get list of all section headers in inp to use as section ending flags
    headers = get_inp_sections_details(inp_path, include_brackets=False)

    if sect not in headers:
        warnings.warn(f'{sect} section not found in {inp_path}')
        return pd.DataFrame()

    # extract the string and read into a dataframe
    start_string = format_inp_section_header(section)
    end_strings = [format_inp_section_header(h) for h in headers.keys()]
    s = extract_section_of_file(inp_path, start_string, end_strings, **kwargs)

    # replace occurrences of double quotes ""
    s = s.replace('""', quote_replace)

    # and get the list of columns to use for parsing this section
    # add any additional columns needed for special cases (build instructions)
    additional_cols = [] if additional_cols is None else additional_cols
    cols = headers[sect]['columns'] + additional_cols

    if headers[sect]['columns'][0] == 'blob':
        # return the whole row, without specific col headers
        return pd.read_csv(StringIO(s), delim_whitespace=False)
    else:
        try:
            df = pd.read_csv(StringIO(s),
                             header=None,
                             delim_whitespace=True,
                             skiprows=[0],
                             index_col=0,
                             names=cols)
        except IndexError:
            print(
                f'failed to parse {section} with cols: {cols}. head:\n{s[:500]}'
            )
            raise

    return df
Beispiel #3
0
def test_format_inp_section_header():

    header_string = '[CONDUITS]'
    header_string = format_inp_section_header(header_string)
    assert (header_string == '[CONDUITS]')

    header_string = '[conduits]'
    header_string = format_inp_section_header(header_string)
    assert (header_string == '[CONDUITS]')

    header_string = 'JUNCTIONS'
    header_string = format_inp_section_header(header_string)
    assert (header_string == '[JUNCTIONS]')

    header_string = 'pumps'
    header_string = format_inp_section_header(header_string)
    assert (header_string == '[PUMPS]')
Beispiel #4
0
def write_inp_section(file_object,
                      allheaders,
                      sectionheader,
                      section_data,
                      pad_top=True,
                      na_fill=''):
    """
    given an open file object, list of header sections, the current
    section header, and the section data in a Pandas Dataframe format, this function writes
    the data to the file object.
    """

    f = file_object
    add_str = ''
    sectionheader = format_inp_section_header(sectionheader)
    if not section_data.empty:
        if pad_top:
            f.write('\n\n' + sectionheader +
                    '\n')  # add SWMM-friendly header e.g. [DWF]
        else:
            f.write(sectionheader + '\n')
        if allheaders and (sectionheader in allheaders
                           ) and allheaders[sectionheader]['columns'] == [
                               'blob'
                           ]:
            # to left justify based on the longest string in the blob column
            formatter = '{{:<{}s}}'.format(
                section_data[sectionheader].str.len().max()).format
            add_str = section_data.fillna('').to_string(
                index_names=False,
                header=False,
                index=False,
                justify='left',
                formatters={sectionheader: formatter})

        else:
            # naming the columns to the index name so the it prints in-line with col headers
            f.write(';;')
            # to left justify on longest string in the Comment column
            # this is overly annoying, to deal with 'Objects' vs numbers to remove
            # two bytes added from the double semicolon header thing (to keep things lined up)
            objectformatter = {
                hedr: ' {{:<{}}}'.format(
                    section_data[hedr].apply(str).str.len().max()).format
                for hedr in section_data.columns
            }
            numformatter = {
                hedr: '  {{:<{}}}'.format(
                    section_data[hedr].apply(str).str.len().max()).format
                for hedr in section_data.columns
                if section_data[hedr].dtype != "O"
            }
            objectformatter.update(numformatter)
            add_str = section_data.fillna(na_fill).to_string(
                index_names=False,
                header=True,
                justify='left',
                formatters=objectformatter  # {'Comment':formatter}
            )

        # write the dataframe as a string
        f.write(add_str + '\n\n')
Beispiel #5
0
def get_inp_sections_details(inp_path, include_brackets=False):
    """
    creates a dictionary with all the headers found in an INP file
    (which varies based on what the user has defined in a given model)
    and updates them based on the definitions in inp_header_dict
    this ensures the list is comprehensive
    :param inp_path:
    :param include_brackets: whether to parse sections including the []
    :return: OrderedDict
    >>> from swmmio.tests.data import MODEL_FULL_FEATURES_XY
    >>> headers = get_inp_sections_details(MODEL_FULL_FEATURES_XY)
    >>> [header for header, cols in headers.items()][:4]
    ['TITLE', 'OPTIONS', 'EVAPORATION', 'RAINGAGES']
    >>> headers['SUBCATCHMENTS']['columns']
    ['Name', 'Raingage', 'Outlet', 'Area', 'PercImperv', 'Width', 'PercSlope', 'CurbLength', 'SnowPack']
    """
    from swmmio.defs import INP_OBJECTS
    import pandas as pd
    found_sects = OrderedDict()

    with open(inp_path) as f:
        for line in f:
            sect_not_found = True
            for sect_id, data in INP_OBJECTS.items():
                # find the start of an INP section
                search_tag = format_inp_section_header(sect_id)
                if search_tag.lower() in line.lower():
                    if include_brackets:
                        sect_id = '[{}]'.format(sect_id.upper())
                    found_sects[sect_id.upper()] = data
                    sect_not_found = False
                    break
            if sect_not_found:
                if '[' and ']' in line:
                    h = line.strip()
                    if not include_brackets:
                        h = h.replace('[', '').replace(']', '')
                    found_sects[h] = OrderedDict(columns=['blob'])

    # make necessary adjustments to columns that change based on options
    ops_cols = INP_OBJECTS['OPTIONS']['columns']
    ops_string = extract_section_of_file(
        inp_path,
        '[OPTIONS]',
        INP_SECTION_TAGS,
    )
    options = pd.read_csv(StringIO(ops_string),
                          header=None,
                          delim_whitespace=True,
                          skiprows=[0],
                          index_col=0,
                          names=ops_cols)

    if 'INFILTRATION' in found_sects:
        # select the correct infiltration column names
        # fall back to HORTON if invalid/unset infil type
        infil_type = options['Value'].get('INFILTRATION', None)
        if pd.isna(infil_type):
            infil_type = 'HORTON'
        infil_cols = INFILTRATION_COLS[infil_type]

        inf_id = 'INFILTRATION'
        if include_brackets:
            inf_id = '[{}]'.format('INFILTRATION')

        # overwrite the dynamic sections with proper header cols
        found_sects[inf_id]['columns'] = list(infil_cols)
    return found_sects