Example #1
0
def parse_data(data_text):
    """
      read lines with 11 numbers and convert to dataframe

      data_text looks like:

        -----------------------------------------------------------------------------
           PRES   HGHT   TEMP   DWPT   RELH   MIXR   DRCT   SKNT   THTA   THTE   THTV
            hPa     m      C      C      %    g/kg    deg   knot     K      K      K 
        -----------------------------------------------------------------------------
         1000.0    100                                                               
          979.0    288   24.0   23.0     94  18.45      0      0  299.0  353.0  302.2
          974.0    333   25.2   21.1     78  16.46    348      0  300.6  349.1  303.6
          932.0    719   24.0   16.0     61  12.42    243      3  303.2  340.3  305.4
          925.0    785   23.4   15.4     61  12.03    225      3  303.2  339.2  305.4
    """
    all_lines=data_text.strip().split('\n')
    count=0
    theLine=all_lines[count]
    try:
        while theLine.find('PRES   HGHT   TEMP   DWPT') < 0:
            count += 1
            theLine = all_lines[count]
        header_names=all_lines[count].lower().split()
    except IndexError:
        print("no column header line found in sounding")
        sys.exit(1)
    count += 1  #go to unit names
    unit_names=all_lines[count].split()
    count+=2  #skip a row of ------
    data_list=[]
    while True:
        try:
            the_line=all_lines[count]
            dataFields = the_line.split()
            if len(dataFields) == 11:
                try:
                    dataFields = [float(number) for number in dataFields]
                    es = esat(dataFields[3] + 273.15)*0.01  #get vapor pressure from dewpoint in hPa
                    dataFields[5] = (con.eps*es/(dataFields[0] - es))*1.e3   #g/kg
                except VauleError:
                    print('trouble converting dataFields to float')
                    print(dataFields)
                    sys.exit(1)
                data_list.append(dataFields)
                #
                # get the next line
                #
            count += 1
            theLine = all_lines[count]
        except IndexError:
            break
    df_out=pd.DataFrame.from_records(data_list,columns=header_names)
    return df_out,unit_names
Example #2
0
def parse_data(data_text):
    all_lines=data_text.strip().split('\n')
    count=0
    theLine=all_lines[count]
    try:
        while theLine.find('PRES   HGHT   TEMP   DWPT') < 0:
            count += 1
            theLine = all_lines[count]
        header_names=all_lines[count].lower().split()
    except IndexError:
        print("no column header line found in sounding")
        sys.exit(1)
    count += 1  #go to unit names
    unit_names=all_lines[count].split()
    count+=2  #skip a row of ------
    data_list=[]
    while True:
        try:
            the_line=all_lines[count]
            dataFields = the_line.split()
            if len(dataFields) == 11:
                try:
                    dataFields = [float(number) for number in dataFields]
                    es = esat(dataFields[3] + 273.15)*0.01  #get vapor pressure from dewpoint in hPa
                    dataFields[5] = (con.eps*es/(dataFields[0] - es))*1.e3   #g/kg
                except VauleError:
                    print('trouble converting dataFields to float')
                    print(dataFields)
                    sys.exit(1)
                data_list.append(dataFields)
                #
                # get the next line
                #
            count += 1
            theLine = all_lines[count]
        except IndexError:
            break
    df_out=pd.DataFrame.from_records(data_list,columns=header_names)
    return df_out,unit_names
def readsound(inFileName='soundings_july.txt'):
    """ Read San Diego DYCOMS soundings from file soundings.txt.

    Returns a dictionary allsounds[key] which contains all soundings
    in the file. The key is of the form '12Z 18 Jul 2001'.
    allsounds[key] is an array [level, variable], where level is the
    index of the level and variable is
         [,0] PRES(hPa); [,1] HGHT(m); [,2] TEMP[C];
         [,3] DWPT(C); [,4] RELH(%); [,5] MIXR(g/kg);
         [,6] DRCT(deg); [,7] SKNT(knot); [,8] THTA(K);
         [,9] THTE(K); [,10] THTV(K)
    """
    markerText, time, day, month, year = get_segment(inFileName)
    #
    # Open input file and read all lines into thelines.
    with open(inFileName) as f:
        thelines = f.read()
        thelines = thelines.strip()
    #
    # Each sounding starts with a line that contains the string
    # 'NKX San Diego Observations at'. Split the lines into individual
    # soundings.
    marker = re.compile(markerText)
    soundings = marker.split(thelines)
    #
    # Expressions that are used in the following loop.
    lineend = re.compile('\n')
    whitespace = re.compile('\s+')
    #
    # Initialise a dictionary that will hold the soundings. The soundings
    # will be accessible using date/time as key, e.g. '12Z 01 Jul 2001'.
    allsounds = {}

    #
    # Loop over all soundings: Extract the lines that contain the data and
    # convert the data from strings to floats.
    for itemcount, item in enumerate(soundings):
        newsplit = lineend.split(item)
        count = 0
        theLine = newsplit[0]
        #
        # The data lines start with a header. Search for this header.
        #
        try:
            while theLine.find('PRES   HGHT   TEMP   DWPT') < 0:
                count += 1
                theLine = newsplit[count]
        except IndexError:
            if itemcount > 0:  #item[0] is blank
                print("no column header line found in sounding" \
                      " %d\ncontents: %s" % (itemcount, item))
            continue
        count += 1
        theLine = newsplit[count]
        if theLine.find('hPa     m      C      C      %') < 0:
            raise IOError("no units line found in file %s" % theLine)
        count += 1
        theLine = newsplit[count]
        if theLine.find('-------------') < 0:
            raise IOError("expected ---- separator")
        count += 1
        theLine = newsplit[count]
        #
        # lineSave will hold all levels of data (one line in the file
        # corresponds to one level); header will become the key for the
        # sounding in allsounds, e.g. '12Z 01 Jul 2001'.
        lineSave = []
        header = newsplit[0].strip()

        while True:
            try:
                #
                # For each line in the sounding convert the string with the
                # data to an array of floats.
                # Data fields: [0] PRES(hPa); [1] HGHT(m); [2] TEMP[C];
                #              [3] DWPT(C); [4] RELH(%); [5] MIXR(g/kg);
                #              [6] DRCT(deg); [7] SKNT(knot); [8] THTA(K);
                #              [9] THTE(K); [10] THTV(K)
                #
                # at elevation, the bottom lines may be blank:
                #
                #
                #  1000.0     76
                #   925.0    777
                #   850.0   1525
                #   841.0   1620   28.8    0.8     16   4.85     85      9  317.3  333.1  318.2
                #
                dataFields = re.split(whitespace, theLine.strip())
                if len(dataFields) == 11:
                    try:
                        dataFields = [float(number) for number in dataFields]
                    except VauleError:
                        print('trouble converting dataFields to float')
                        print(dataFields)
                        sys.exit(1)
                    #
                    # Recompute the mixing ratio. This is done because due to only
                    # two digits accuracy in sounding.txt, the mixing ratio becomes
                    # 0.00 from time to time.
                    es = esat(
                        dataFields[3] + 273.15
                    ) * 0.01  #get vapor pressure from dewpoint in hPa
                    dataFields[5] = (con.eps * es /
                                     (dataFields[0] - es)) * 1.e3  #g/kg
                    #
                    # Only store complete lines with 11 elements.
                    lineSave.append(dataFields)
                    count += 1
                    theLine = newsplit[count]
                #
                # get the next line
                #
                count += 1
                theLine = newsplit[count]
            except IndexError:
                # out of lines, go to next sounding
                break

        if len(lineSave) < 5:
            #
            # The sounding should have more than 4 levels.
            print("trouble with item %d %d" % (itemcount, len(lineSave)))
        else:
            #
            # Store the sounding to allsounds as a pandas dataframe
            the_date = parsekey(header)
            allsounds[the_date] = pd.DataFrame.from_records(lineSave,
                                                            columns=columns)
        the_keys = list(allsounds.keys())
        the_keys.sort()
        out_dict = OrderedDict()
        for key in the_keys:
            out_dict[key] = allsounds[key]
    return out_dict
Example #4
0
def readsound(inFileName = 'soundings_july.txt'):
    """ Read San Diego DYCOMS soundings from file soundings.txt.

    Returns a dictionary allsounds[key] which contains all soundings
    in the file. The key is of the form '12Z 18 Jul 2001'.
    allsounds[key] is an array [level, variable], where level is the
    index of the level and variable is
         [,0] PRES(hPa); [,1] HGHT(m); [,2] TEMP[C];
         [,3] DWPT(C); [,4] RELH(%); [,5] MIXR(g/kg);
         [,6] DRCT(deg); [,7] SKNT(knot); [,8] THTA(K);
         [,9] THTE(K); [,10] THTV(K)
    """
    markerText,time,day,month,year=get_segment(inFileName)
    #
    # Open input file and read all lines into thelines.
    with open(inFileName) as f:
        thelines = f.read()
        thelines=thelines.strip()
    #
    # Each sounding starts with a line that contains the string
    # 'NKX San Diego Observations at'. Split the lines into individual
    # soundings.
    marker = re.compile(markerText)
    soundings = marker.split(thelines)
    #
    # Expressions that are used in the following loop.
    lineend = re.compile('\n')
    whitespace = re.compile('\s+')
    #
    # Initialise a dictionary that will hold the soundings. The soundings
    # will be accessible using date/time as key, e.g. '12Z 01 Jul 2001'.
    allsounds = {}

    #
    # Loop over all soundings: Extract the lines that contain the data and
    # convert the data from strings to floats.
    for itemcount,item in enumerate(soundings):
        newsplit = lineend.split(item)
        count = 0
        theLine = newsplit[0]
        #
        # The data lines start with a header. Search for this header.
        #
        try:
            while theLine.find('PRES   HGHT   TEMP   DWPT') < 0:
                count += 1
                theLine = newsplit[count]
        except IndexError:
            if itemcount > 0:  #item[0] is blank
                print("no column header line found in sounding" \
                      " %d\ncontents: %s" % (itemcount, item))
            continue
        count += 1
        theLine = newsplit[count]
        if theLine.find('hPa     m      C      C      %') < 0:
            raise IOError("no units line found in file %s" % theLine)
        count += 1
        theLine = newsplit[count]
        if theLine.find('-------------') < 0:
            raise IOError("expected ---- separator")
        count += 1
        theLine = newsplit[count]
        #
        # lineSave will hold all levels of data (one line in the file
        # corresponds to one level); header will become the key for the
        # sounding in allsounds, e.g. '12Z 01 Jul 2001'.
        lineSave = []
        header = newsplit[0].strip()

        while True:
            try:
                #
                # For each line in the sounding convert the string with the
                # data to an array of floats.
                # Data fields: [0] PRES(hPa); [1] HGHT(m); [2] TEMP[C];
                #              [3] DWPT(C); [4] RELH(%); [5] MIXR(g/kg);
                #              [6] DRCT(deg); [7] SKNT(knot); [8] THTA(K);
                #              [9] THTE(K); [10] THTV(K)
                #
                # at elevation, the bottom lines may be blank:
                #
                #
                #  1000.0     76                                                               
                #   925.0    777                                                               
                #   850.0   1525                                                               
                #   841.0   1620   28.8    0.8     16   4.85     85      9  317.3  333.1  318.2
                #
                dataFields = re.split(whitespace, theLine.strip())
                if len(dataFields) == 11:
                    try:
                        dataFields = [float(number) for number in dataFields]
                    except VauleError:
                        print('trouble converting dataFields to float')
                        print(dataFields)
                        sys.exit(1)
                    #
                    # Recompute the mixing ratio. This is done because due to only
                    # two digits accuracy in sounding.txt, the mixing ratio becomes
                    # 0.00 from time to time.
                    es = esat(dataFields[3] + 273.15)*0.01  #get vapor pressure from dewpoint in hPa
                    dataFields[5] = (con.eps*es/(dataFields[0] - es))*1.e3   #g/kg
                    #
                    # Only store complete lines with 11 elements.
                    lineSave.append(dataFields)
                    count += 1
                    theLine = newsplit[count]
                #
                # get the next line
                #
                count += 1
                theLine = newsplit[count]
            except IndexError:
                # out of lines, go to next sounding
                break
            
        if len(lineSave) < 5:
            #
            # The sounding should have more than 4 levels.
            print("trouble with item %d %d" % (itemcount,len(lineSave)))
        else:
            #
            # Store the sounding to allsounds as a pandas dataframe
            the_date = parsekey(header)
            allsounds[the_date] = pd.DataFrame.from_records(lineSave,columns=columns)
        the_keys=list(allsounds.keys())
        the_keys.sort()
        out_dict=OrderedDict()
        for key in the_keys:
            out_dict[key]=allsounds[key]
    return out_dict