Exemple #1
0
 def onClick_BtnAddSeries(self, event, strLabel):
     """
     """
     dlg = wx.TextEntryDialog(None, "Name of Data Series you are adding to the database:", "New Data Series", " ")
     answer = dlg.ShowModal()
     if answer == wx.ID_OK:
         stNewSeries = dlg.GetValue()
         stNewSeries = " ".join(stNewSeries.split())
         recID = scidb.assureItemIsInTableField(stNewSeries, "DataSeries", "DataSeriesDescription")
         self.fillSeriesList()
     else:
         stNewSeries = ''
     dlg.Destroy()
Exemple #2
0
 def onClick_BtnAddStation(self, event, strLabel):
     """
     """
     dlg = wx.TextEntryDialog(None, "Name of Station you are adding to the database:", "New Station", " ")
     answer = dlg.ShowModal()
     if answer == wx.ID_OK:
         stNewStation = dlg.GetValue()
         stNewStation = " ".join(stNewStation.split())
         recID = scidb.assureItemIsInTableField(stNewStation, "Stations", "StationName")
         self.fillStationsList()
     else:
         stNewStation = ''
     dlg.Destroy()
Exemple #3
0
    def parseGLTextFile(self, infoDict):
        """
        parse a GreenLogger text file
        """
        dataRecItemCt = 0
        dataRecsAdded = 0
        dataRecsDupSkipped = 0

        #        print "About to put lines into temp table"
        self.putTextLinesIntoTmpTable(infoDict)
        #        print "Finished putting lines into temp table"
        # get the metadata header
        stSQL = """SELECT Line FROM tmpLines WHERE Line LIKE '{"Instrument identifier":%' GROUP BY Line;"""
        mtDat = scidb.curT.execute(stSQL).fetchone()
        dictHd = ast.literal_eval(mtDat['Line'])
        # somewhat redundant to do 'LoggerSerialNumber' here (fn 'assureChannelIsInDB' would fill in
        # later), but allows putting Model into the DB
        iInstSpecID = scidb.assureItemIsInTableField(dictHd['Model'],
                                                     "InstrumentSpecs",
                                                     "InstrumentSpec")
        iLoggerID = scidb.assureItemIsInTableField(
            dictHd['Instrument identifier'], "Loggers", "LoggerSerialNumber")
        scidb.curD.execute(
            "UPDATE Loggers SET InstrumentSpecID = ? WHERE ID = ?;",
            (iInstSpecID, iLoggerID))

        # get the hour offset(s); most files will have only one
        stSQL = "SELECT substr(Line, 21, 3) as TZ FROM tmpLines " \
                "WHERE Line LIKE '____-__-__ __:__:__ ___%' GROUP BY TZ;"
        hrOffsets = scidb.curT.execute(stSQL).fetchall()
        for hrOffset in hrOffsets:
            iTimeZoneOffset = int(hrOffset['TZ'])
            # make a dictionary of channel IDs for data lines that have this hour offset
            # would be slightly different for different hour offsets
            lCols = dictHd[
                'Columns']  # get col metadata; this is a list of dictionaries
            #            # it is indexed by the columns list, and is zero-based
            #            lCh = [0 for i in range(len(lCols))] # initially, fill with all zeroes
            dictChannels = {}  # this is the dictionary we will build
            for dictCol in lCols:
                # somewhat redundant to fill these in before calling function 'assureChannelIsInDB', but
                #  allows assigning sensor device types
                iDeviceTypeID = scidb.assureItemIsInTableField(
                    dictCol['Device'], "DeviceSpecs", "DeviceSpec")
                iSensorID = scidb.assureItemIsInTableField(
                    dictCol['Identifier'], "Sensors", "SensorSerialNumber")
                scidb.curD.execute(
                    "UPDATE Sensors SET DeviceSpecID = ? WHERE ID = ?;",
                    (iDeviceTypeID, iSensorID))
                #                iDataTypeID = scidb.assureItemIsInTableField(dictCol['DataType'], "DataTypes", "TypeText")
                #                iDataUnitsID = scidb.assureItemIsInTableField(dictCol['DataUnits'], "DataUnits", "UnitsText")
                # build list to create the channel
                # list items are: ChannelID, originalCol, Logger, Sensor, dataType, dataUnits, hrOffset, new
                lChannel = [
                    0, dictCol['Order'], dictHd['Instrument identifier'],
                    dictCol['Identifier'], dictCol['DataType'],
                    dictCol['DataUnits'], iTimeZoneOffset, ''
                ]
                dictChannels[dictCol['Order']] = (
                    lChannel[:])  # the key is the column number
                scidb.assureChannelIsInDB(dictChannels[
                    dictCol['Order']])  # get or create the channel
                iChannelID = dictChannels[dictCol['Order']][0]
                # store the column name as a Series
                iSeriesID = scidb.assureItemIsInTableField(
                    dictCol['Name'], "DataSeries", "DataSeriesDescription")
                # tie it to this Channel, to offer later
                stSQLcs = 'INSERT INTO tmpChanSegSeries(ChannelID, SeriesID) VALUES (?, ?);'
                try:
                    scidb.curD.execute(stSQLcs, (iChannelID, iSeriesID))
                except sqlite3.IntegrityError:
                    pass  # silently ignore duplicates

#            print 'Before Channel function'
#            for ky in dictChannels.keys():
#                print ky, dictChannels[ky][:]
#            for ky in dictChannels.keys():
#                scidb.assureChannelIsInDB(dictChannels[ky])
#            print 'After Channel function'
#            for ky in dictChannels.keys():
#                print ky, dictChannels[ky][:]

# make a list of channel IDs for the set of lines with this HrOffset, for quick lookup
# it is indexed by the columns list, and is zero-based
            lCh = []
            for iCol in range(len(lCols)):
                iNomCol = iCol + 1
                if iNomCol in dictChannels:
                    lChanSet = dictChannels[iNomCol][:]
                    lCh.append(lChanSet[0])
                else:  # does not correspond to a data colum
                    lCh.append(
                        0)  # placeholder, to make list indexes work right

            # done setting up channels, get data lines
            stSQL = "SELECT ID, Line FROM tmpLines WHERE substr(Line, 21, 3) = ? " \
                "AND Line LIKE '____-__-__ __:__:__ ___%' ORDER BY ID;"
            recs = scidb.curT.execute(stSQL, (hrOffset['TZ'], )).fetchall()
            for rec in recs:
                lData = rec['Line'].split('\t')
                # item zero is the timestamp followed by the timezone offset
                sTimeStamp = lData[
                    0][:-4]  # drop timezone offset, we already have it
                tsAsTime = datetime.datetime.strptime(sTimeStamp,
                                                      "%Y-%m-%d %H:%M:%S")
                tsAsTime.replace(
                    tzinfo=None
                )  # make sure it does not get local timezone info
                tsAsTime = tsAsTime + datetime.timedelta(
                    hours=-iTimeZoneOffset)
                tsAsDate = tsAsTime.date()
                stSQL = "INSERT INTO Data (UTTimestamp, ChannelID, Value) VALUES (?, ?, ?)"
                for iCol in range(len(lData)):
                    if iCol > 0:  # an item of data
                        # give some progress diagnostics
                        dataRecItemCt += 1
                        if dataRecItemCt % 100 == 0:
                            self.msgArea.ChangeValue("Line " + str(rec['ID']) +
                                                     " of " +
                                                     str(infoDict['lineCt']) +
                                                     "; " +
                                                     str(dataRecsAdded) +
                                                     " records added, " +
                                                     str(dataRecsDupSkipped) +
                                                     " duplicates skipped.")
                            wx.Yield()
                        try:  # much faster to try and fail than to test first
                            scidb.curD.execute(
                                stSQL, (tsAsTime, lCh[iCol], lData[iCol]))
                            dataRecsAdded += 1  # count it
                        except sqlite3.IntegrityError:  # item is already in Data table
                            dataRecsDupSkipped += 1  # count but otherwise ignore
                        finally:
                            wx.Yield()

        # finished parsing lines
        infoDict['numNewDataRecsAdded'] = dataRecsAdded
        infoDict['numDupDataRecsSkipped'] = dataRecsDupSkipped
        infoDict['stParseMsg'] = str(infoDict['lineCt']) + " lines processed; " + \
            str(dataRecsAdded) + " data records added to database, " + \
            str(dataRecsDupSkipped) + " duplicates skipped."
        self.msgArea.ChangeValue(infoDict['stParseMsg'])
Exemple #4
0
    def parseBlueTermFile(self, infoDict):
        """
        a BlueTerm log file can contain captured data from a series of daily
        GL text files, possibly even from different loggers
        """
        dataRecItemCt = 0
        dataRecsAdded = 0
        dataRecsDupSkipped = 0

        self.putTextLinesIntoTmpTable(infoDict)

        # find the different data dump segments
        # within each one, data will be from only one logger, and so we
        #  can use the same metadata header
        # make a list of 2-tuples; tuple item 0 = the starting ID of the
        # dump segment and tuple item 1 = the ending ID
        lDmpSegs = []
        stSQL = """SELECT ID, Line FROM tmpLines
            WHERE Line = '{"datadump":"begin"}' OR Line = '{"datadump":"end"}'
            ORDER BY ID;"""
        dmSegs = scidb.curT.execute(stSQL).fetchall()
        # begin or end may be missing, due to log interruption
        idSegStart = None
        idSegEnd = None
        # only use segments that have valid begin/end
        for dmSegItm in dmSegs:
            if dmSegItm['Line'] == '{"datadump":"begin"}':
                idSegStart = dmSegItm['ID']
            elif dmSegItm['Line'] == '{"datadump":"end"}':
                if idSegStart != None:
                    # make the tuple
                    tSeg = (idSegStart, dmSegItm['ID'])
                    # append to list
                    lDmpSegs.append(tSeg)
                    # null the vars
                    idSegStart = None
                    idSegEnd = None
            else:  # this should not happen, but tie up if it does
                idSegStart = None
                idSegEnd = None

        print "dump segments:", lDmpSegs

        # being worked on >>>>
        iNumSegs = len(lDmpSegs)
        if iNumSegs == 0:
            infoDict['stParseMsg'] = " No valid data segments in file."
            # will skip following loop
        iCtSeg = 0
        for tSeg in lDmpSegs:
            iCtSeg += 1
            idSegStart, idSegEnd = tSeg
            # get the metadata header; if multiple will all be the same for one dump segment
            stSQL = """SELECT Line 
                    FROM tmpLines 
                    WHERE ID > ? AND ID < ? 
                    AND Line LIKE '{"Instrument identifier":%'
                    GROUP BY Line;"""
            mtDat = scidb.curT.execute(stSQL, tSeg).fetchone()
            dictHd = ast.literal_eval(mtDat['Line'])
            # somewhat redundant to do 'LoggerSerialNumber' here (fn 'assureChannelIsInDB' would fill in
            # later), but allows putting Model into the DB
            iInstSpecID = scidb.assureItemIsInTableField(
                dictHd['Model'], "InstrumentSpecs", "InstrumentSpec")
            iLoggerID = scidb.assureItemIsInTableField(
                dictHd['Instrument identifier'], "Loggers",
                "LoggerSerialNumber")
            scidb.curD.execute(
                "UPDATE Loggers SET InstrumentSpecID = ? WHERE ID = ?;",
                (iInstSpecID, iLoggerID))

            # get the hour offset(s); most files will have only one
            stSQL = "SELECT substr(Line, 21, 3) as TZ FROM tmpLines " \
                    "WHERE ID > ? AND ID < ? " \
                    "AND Line LIKE '____-__-__ __:__:__ ___%' GROUP BY TZ;"
            hrOffsets = scidb.curT.execute(stSQL, tSeg).fetchall()
            for hrOffset in hrOffsets:
                iTimeZoneOffset = int(hrOffset['TZ'])
                # make a dictionary of channel IDs for data lines that have this hour offset
                # would be slightly different for different hour offsets
                lCols = dictHd[
                    'Columns']  # get col metadata; this is a list of dictionaries
                #            # it is indexed by the columns list, and is zero-based
                #            lCh = [0 for i in range(len(lCols))] # initially, fill with all zeroes
                dictChannels = {}  # this is the dictionary we will build
                for dictCol in lCols:
                    # somewhat redundant to fill these in before calling function 'assureChannelIsInDB', but
                    #  allows assigning sensor device types
                    iDeviceTypeID = scidb.assureItemIsInTableField(
                        dictCol['Device'], "DeviceSpecs", "DeviceSpec")
                    iSensorID = scidb.assureItemIsInTableField(
                        dictCol['Identifier'], "Sensors", "SensorSerialNumber")
                    scidb.curD.execute(
                        "UPDATE Sensors SET DeviceSpecID = ? WHERE ID = ?;",
                        (iDeviceTypeID, iSensorID))
                    #                iDataTypeID = scidb.assureItemIsInTableField(dictCol['DataType'], "DataTypes", "TypeText")
                    #                iDataUnitsID = scidb.assureItemIsInTableField(dictCol['DataUnits'], "DataUnits", "UnitsText")
                    # build list to create the channel
                    # list items are: ChannelID, originalCol, Logger, Sensor, dataType, dataUnits, hrOffset, new
                    lChannel = [
                        0, dictCol['Order'], dictHd['Instrument identifier'],
                        dictCol['Identifier'], dictCol['DataType'],
                        dictCol['DataUnits'], iTimeZoneOffset, ''
                    ]
                    dictChannels[dictCol['Order']] = (
                        lChannel[:])  # the key is the column number
                    scidb.assureChannelIsInDB(dictChannels[
                        dictCol['Order']])  # get or create the channel
                    iChannelID = dictChannels[dictCol['Order']][0]
                    # store the column name as a Series
                    iSeriesID = scidb.assureItemIsInTableField(
                        dictCol['Name'], "DataSeries", "DataSeriesDescription")
                    # tie it to this Channel, to offer later
                    stSQLcs = 'INSERT INTO tmpChanSegSeries(ChannelID, SeriesID) VALUES (?, ?);'
                    try:
                        scidb.curD.execute(stSQLcs, (iChannelID, iSeriesID))
                    except sqlite3.IntegrityError:
                        pass  # silently ignore duplicates

    #            print 'Before Channel function'
    #            for ky in dictChannels.keys():
    #                print ky, dictChannels[ky][:]
    #            for ky in dictChannels.keys():
    #                scidb.assureChannelIsInDB(dictChannels[ky])
    #            print 'After Channel function'
    #            for ky in dictChannels.keys():
    #                print ky, dictChannels[ky][:]

    # make a list of channel IDs for the set of lines with this HrOffset, for quick lookup
    # it is indexed by the columns list, and is zero-based
                lCh = []
                for iCol in range(len(lCols)):
                    iNomCol = iCol + 1
                    if iNomCol in dictChannels:
                        lChanSet = dictChannels[iNomCol][:]
                        lCh.append(lChanSet[0])
                    else:  # does not correspond to a data colum
                        lCh.append(
                            0)  # placeholder, to make list indexes work right

                # done setting up channels, get data lines
                stSQL = "SELECT ID, Line FROM tmpLines " \
                    "WHERE ID > ? AND ID < ? " \
                    "AND substr(Line, 21, 3) = ? " \
                    "AND Line LIKE '____-__-__ __:__:__ ___%' ORDER BY ID;"
                recs = scidb.curT.execute(stSQL, (
                    idSegStart,
                    idSegEnd,
                    hrOffset['TZ'],
                )).fetchall()
                iNumSegLines = len(recs)
                iCtSegLines = 0
                for rec in recs:
                    iCtSegLines += 1
                    lData = rec['Line'].split('\t')
                    # item zero is the timestamp followed by the timezone offset
                    sTimeStamp = lData[
                        0][:-4]  # drop timezone offset, we already have it
                    tsAsTime = datetime.datetime.strptime(
                        sTimeStamp, "%Y-%m-%d %H:%M:%S")
                    tsAsTime.replace(
                        tzinfo=None
                    )  # make sure it does not get local timezone info
                    tsAsTime = tsAsTime + datetime.timedelta(
                        hours=-iTimeZoneOffset)
                    tsAsDate = tsAsTime.date()
                    stSQL = "INSERT INTO Data (UTTimestamp, ChannelID, Value) VALUES (?, ?, ?)"
                    for iCol in range(len(lData)):
                        if iCol > 0:  # an item of data
                            # give some progress diagnostics
                            dataRecItemCt += 1
                            if dataRecItemCt % 100 == 0:
                                self.msgArea.ChangeValue(
                                    "Segment " + str(iCtSeg) + " of " +
                                    str(iNumSegs) + ", HrOffset " +
                                    str(iTimeZoneOffset) + ", Line " +
                                    str(iCtSegLines) + " of " +
                                    str(iNumSegLines) + "; " +
                                    str(dataRecsAdded) + " records added, " +
                                    str(dataRecsDupSkipped) +
                                    " duplicates skipped.")
                                wx.Yield()
                            try:  # much faster to try and fail than to test first
                                scidb.curD.execute(
                                    stSQL, (tsAsTime, lCh[iCol], lData[iCol]))
                                dataRecsAdded += 1  # count it
                            except sqlite3.IntegrityError:  # item is already in Data table
                                dataRecsDupSkipped += 1  # count but otherwise ignore
                            finally:
                                wx.Yield()
        # <<<<< being worked on
        # finished parsing lines
        infoDict['numNewDataRecsAdded'] = dataRecsAdded
        infoDict['numDupDataRecsSkipped'] = dataRecsDupSkipped
        infoDict['stParseMsg'] = str(infoDict['lineCt']) + " lines processed; " + \
            str(dataRecsAdded) + " data records added to database, " + \
            str(dataRecsDupSkipped) + " duplicates skipped."
        self.msgArea.ChangeValue(infoDict['stParseMsg'])
Exemple #5
0
    def parseGLTextFile(self, infoDict):
        """
        parse a GreenLogger text file
        """
        dataRecItemCt = 0
        dataRecsAdded = 0
        dataRecsDupSkipped = 0

#        print "About to put lines into temp table"
        self.putTextLinesIntoTmpTable(infoDict)
#        print "Finished putting lines into temp table"
        # get the metadata header
        stSQL = """SELECT Line FROM tmpLines WHERE Line LIKE '{"Instrument identifier":%' GROUP BY Line;"""
        mtDat =  scidb.curT.execute(stSQL).fetchone()
        dictHd = ast.literal_eval(mtDat['Line'])
        # somewhat redundant to do 'LoggerSerialNumber' here (fn 'assureChannelIsInDB' would fill in
        # later), but allows putting Model into the DB
        iInstSpecID = scidb.assureItemIsInTableField(dictHd['Model'], "InstrumentSpecs", "InstrumentSpec")
        iLoggerID = scidb.assureItemIsInTableField(dictHd['Instrument identifier'], "Loggers", "LoggerSerialNumber")
        scidb.curD.execute("UPDATE Loggers SET InstrumentSpecID = ? WHERE ID = ?;", (iInstSpecID, iLoggerID))
        
        # get the hour offset(s); most files will have only one
        stSQL = "SELECT substr(Line, 21, 3) as TZ FROM tmpLines " \
                "WHERE Line LIKE '____-__-__ __:__:__ ___%' GROUP BY TZ;"
        hrOffsets = scidb.curT.execute(stSQL).fetchall()
        for hrOffset in hrOffsets:
            iTimeZoneOffset = int(hrOffset['TZ'])
            # make a dictionary of channel IDs for data lines that have this hour offset
            # would be slightly different for different hour offsets
            lCols = dictHd['Columns'] # get col metadata; this is a list of dictionaries
#            # it is indexed by the columns list, and is zero-based
#            lCh = [0 for i in range(len(lCols))] # initially, fill with all zeroes
            dictChannels = {} # this is the dictionary we will build
            for dictCol in lCols:
                # somewhat redundant to fill these in before calling function 'assureChannelIsInDB', but
                #  allows assigning sensor device types
                iDeviceTypeID = scidb.assureItemIsInTableField(dictCol['Device'], "DeviceSpecs", "DeviceSpec")
                iSensorID = scidb.assureItemIsInTableField(dictCol['Identifier'], "Sensors", "SensorSerialNumber")
                scidb.curD.execute("UPDATE Sensors SET DeviceSpecID = ? WHERE ID = ?;", (iDeviceTypeID, iSensorID))
#                iDataTypeID = scidb.assureItemIsInTableField(dictCol['DataType'], "DataTypes", "TypeText")
#                iDataUnitsID = scidb.assureItemIsInTableField(dictCol['DataUnits'], "DataUnits", "UnitsText")
                # build list to create the channel
                # list items are: ChannelID, originalCol, Logger, Sensor, dataType, dataUnits, hrOffset, new
                lChannel = [0, dictCol['Order'], dictHd['Instrument identifier'], dictCol['Identifier'],
                            dictCol['DataType'], dictCol['DataUnits'], iTimeZoneOffset, '']
                dictChannels[dictCol['Order']] = (lChannel[:]) # the key is the column number
                scidb.assureChannelIsInDB(dictChannels[dictCol['Order']]) # get or create the channel
                iChannelID = dictChannels[dictCol['Order']][0]
                # store the column name as a Series
                iSeriesID = scidb.assureItemIsInTableField(dictCol['Name'], "DataSeries", "DataSeriesDescription")
                # tie it to this Channel, to offer later
                stSQLcs = 'INSERT INTO tmpChanSegSeries(ChannelID, SeriesID) VALUES (?, ?);'
                try:
                    scidb.curD.execute(stSQLcs, (iChannelID, iSeriesID))
                except sqlite3.IntegrityError:
                    pass # silently ignore duplicates

#            print 'Before Channel function'
#            for ky in dictChannels.keys():
#                print ky, dictChannels[ky][:]
#            for ky in dictChannels.keys():
#                scidb.assureChannelIsInDB(dictChannels[ky])
#            print 'After Channel function'
#            for ky in dictChannels.keys():
#                print ky, dictChannels[ky][:]

            # make a list of channel IDs for the set of lines with this HrOffset, for quick lookup
            # it is indexed by the columns list, and is zero-based
            lCh = []
            for iCol in range(len(lCols)):
                iNomCol = iCol + 1
                if iNomCol in dictChannels:
                    lChanSet = dictChannels[iNomCol][:]
                    lCh.append(lChanSet[0])
                else: # does not correspond to a data colum
                    lCh.append(0) # placeholder, to make list indexes work right             
        
            # done setting up channels, get data lines
            stSQL = "SELECT ID, Line FROM tmpLines WHERE substr(Line, 21, 3) = ? " \
                "AND Line LIKE '____-__-__ __:__:__ ___%' ORDER BY ID;"
            recs = scidb.curT.execute(stSQL, (hrOffset['TZ'],)).fetchall()
            for rec in recs:
                lData = rec['Line'].split('\t')
                # item zero is the timestamp followed by the timezone offset
                sTimeStamp = lData[0][:-4] # drop timezone offset, we already have it
                tsAsTime = datetime.datetime.strptime(sTimeStamp, "%Y-%m-%d %H:%M:%S")
                tsAsTime.replace(tzinfo=None) # make sure it does not get local timezone info
                tsAsTime = tsAsTime + datetime.timedelta(hours = -iTimeZoneOffset)
                tsAsDate = tsAsTime.date()
                stSQL = "INSERT INTO Data (UTTimestamp, ChannelID, Value) VALUES (?, ?, ?)"
                for iCol in range(len(lData)):
                    if iCol > 0: # an item of data
                        # give some progress diagnostics
                        dataRecItemCt += 1
                        if dataRecItemCt % 100 == 0:
                            self.msgArea.ChangeValue("Line " + str(rec['ID']) +
                                " of " + str(infoDict['lineCt']) + "; " +
                                str(dataRecsAdded) + " records added, " +
                                str(dataRecsDupSkipped) + " duplicates skipped.")
                            wx.Yield()
                        try: # much faster to try and fail than to test first
                            scidb.curD.execute(stSQL, (tsAsTime, lCh[iCol], lData[iCol]))
                            dataRecsAdded += 1 # count it
                        except sqlite3.IntegrityError: # item is already in Data table
                            dataRecsDupSkipped += 1 # count but otherwise ignore
                        finally:
                            wx.Yield()

        # finished parsing lines
        infoDict['numNewDataRecsAdded'] = dataRecsAdded
        infoDict['numDupDataRecsSkipped'] = dataRecsDupSkipped
        infoDict['stParseMsg'] = str(infoDict['lineCt']) + " lines processed; " + \
            str(dataRecsAdded) + " data records added to database, " + \
            str(dataRecsDupSkipped) + " duplicates skipped."
        self.msgArea.ChangeValue(infoDict['stParseMsg'])
Exemple #6
0
    def parseBlueTermFile(self, infoDict):
        """
        a BlueTerm log file can contain captured data from a series of daily
        GL text files, possibly even from different loggers
        """
        dataRecItemCt = 0
        dataRecsAdded = 0
        dataRecsDupSkipped = 0

        self.putTextLinesIntoTmpTable(infoDict)

        # find the different data dump segments
        # within each one, data will be from only one logger, and so we
        #  can use the same metadata header
        # make a list of 2-tuples; tuple item 0 = the starting ID of the
        # dump segment and tuple item 1 = the ending ID
        lDmpSegs = []
        stSQL = """SELECT ID, Line FROM tmpLines
            WHERE Line = '{"datadump":"begin"}' OR Line = '{"datadump":"end"}'
            ORDER BY ID;"""
        dmSegs = scidb.curT.execute(stSQL).fetchall()
        # begin or end may be missing, due to log interruption
        idSegStart = None
        idSegEnd = None
        # only use segments that have valid begin/end
        for dmSegItm in dmSegs:
            if dmSegItm['Line'] == '{"datadump":"begin"}':
                idSegStart = dmSegItm['ID']
            elif dmSegItm['Line'] == '{"datadump":"end"}':
                if idSegStart != None:
                    # make the tuple
                    tSeg = (idSegStart, dmSegItm['ID'])
                    # append to list
                    lDmpSegs.append(tSeg)
                    # null the vars
                    idSegStart = None
                    idSegEnd = None
            else: # this should not happen, but tie up if it does
                idSegStart = None
                idSegEnd = None
                    
        print "dump segments:", lDmpSegs
        
        # being worked on >>>>
        iNumSegs = len(lDmpSegs)
        if iNumSegs == 0:
            infoDict['stParseMsg'] = " No valid data segments in file."
            # will skip following loop
        iCtSeg = 0
        for tSeg in lDmpSegs:
            iCtSeg += 1
            idSegStart, idSegEnd = tSeg
            # get the metadata header; if multiple will all be the same for one dump segment
            stSQL = """SELECT Line 
                    FROM tmpLines 
                    WHERE ID > ? AND ID < ? 
                    AND Line LIKE '{"Instrument identifier":%'
                    GROUP BY Line;"""
            mtDat =  scidb.curT.execute(stSQL, tSeg).fetchone()
            dictHd = ast.literal_eval(mtDat['Line'])
            # somewhat redundant to do 'LoggerSerialNumber' here (fn 'assureChannelIsInDB' would fill in
            # later), but allows putting Model into the DB
            iInstSpecID = scidb.assureItemIsInTableField(dictHd['Model'], "InstrumentSpecs", "InstrumentSpec")
            iLoggerID = scidb.assureItemIsInTableField(dictHd['Instrument identifier'], "Loggers", "LoggerSerialNumber")
            scidb.curD.execute("UPDATE Loggers SET InstrumentSpecID = ? WHERE ID = ?;", (iInstSpecID, iLoggerID))
            
            # get the hour offset(s); most files will have only one
            stSQL = "SELECT substr(Line, 21, 3) as TZ FROM tmpLines " \
                    "WHERE ID > ? AND ID < ? " \
                    "AND Line LIKE '____-__-__ __:__:__ ___%' GROUP BY TZ;"
            hrOffsets = scidb.curT.execute(stSQL, tSeg).fetchall()
            for hrOffset in hrOffsets:
                iTimeZoneOffset = int(hrOffset['TZ'])
                # make a dictionary of channel IDs for data lines that have this hour offset
                # would be slightly different for different hour offsets
                lCols = dictHd['Columns'] # get col metadata; this is a list of dictionaries
    #            # it is indexed by the columns list, and is zero-based
    #            lCh = [0 for i in range(len(lCols))] # initially, fill with all zeroes
                dictChannels = {} # this is the dictionary we will build
                for dictCol in lCols:
                    # somewhat redundant to fill these in before calling function 'assureChannelIsInDB', but
                    #  allows assigning sensor device types
                    iDeviceTypeID = scidb.assureItemIsInTableField(dictCol['Device'], "DeviceSpecs", "DeviceSpec")
                    iSensorID = scidb.assureItemIsInTableField(dictCol['Identifier'], "Sensors", "SensorSerialNumber")
                    scidb.curD.execute("UPDATE Sensors SET DeviceSpecID = ? WHERE ID = ?;", (iDeviceTypeID, iSensorID))
    #                iDataTypeID = scidb.assureItemIsInTableField(dictCol['DataType'], "DataTypes", "TypeText")
    #                iDataUnitsID = scidb.assureItemIsInTableField(dictCol['DataUnits'], "DataUnits", "UnitsText")
                    # build list to create the channel
                    # list items are: ChannelID, originalCol, Logger, Sensor, dataType, dataUnits, hrOffset, new
                    lChannel = [0, dictCol['Order'], dictHd['Instrument identifier'], dictCol['Identifier'],
                                dictCol['DataType'], dictCol['DataUnits'], iTimeZoneOffset, '']
                    dictChannels[dictCol['Order']] = (lChannel[:]) # the key is the column number
                    scidb.assureChannelIsInDB(dictChannels[dictCol['Order']]) # get or create the channel
                    iChannelID = dictChannels[dictCol['Order']][0]
                    # store the column name as a Series
                    iSeriesID = scidb.assureItemIsInTableField(dictCol['Name'], "DataSeries", "DataSeriesDescription")
                    # tie it to this Channel, to offer later
                    stSQLcs = 'INSERT INTO tmpChanSegSeries(ChannelID, SeriesID) VALUES (?, ?);'
                    try:
                        scidb.curD.execute(stSQLcs, (iChannelID, iSeriesID))
                    except sqlite3.IntegrityError:
                        pass # silently ignore duplicates

    #            print 'Before Channel function'
    #            for ky in dictChannels.keys():
    #                print ky, dictChannels[ky][:]
    #            for ky in dictChannels.keys():
    #                scidb.assureChannelIsInDB(dictChannels[ky])
    #            print 'After Channel function'
    #            for ky in dictChannels.keys():
    #                print ky, dictChannels[ky][:]

                # make a list of channel IDs for the set of lines with this HrOffset, for quick lookup
                # it is indexed by the columns list, and is zero-based
                lCh = []
                for iCol in range(len(lCols)):
                    iNomCol = iCol + 1
                    if iNomCol in dictChannels:
                        lChanSet = dictChannels[iNomCol][:]
                        lCh.append(lChanSet[0])
                    else: # does not correspond to a data colum
                        lCh.append(0) # placeholder, to make list indexes work right             
            
                # done setting up channels, get data lines
                stSQL = "SELECT ID, Line FROM tmpLines " \
                    "WHERE ID > ? AND ID < ? " \
                    "AND substr(Line, 21, 3) = ? " \
                    "AND Line LIKE '____-__-__ __:__:__ ___%' ORDER BY ID;"
                recs = scidb.curT.execute(stSQL, (idSegStart, idSegEnd, hrOffset['TZ'],)).fetchall()
                iNumSegLines = len(recs)
                iCtSegLines = 0
                for rec in recs:
                    iCtSegLines += 1
                    lData = rec['Line'].split('\t')
                    # item zero is the timestamp followed by the timezone offset
                    sTimeStamp = lData[0][:-4] # drop timezone offset, we already have it
                    tsAsTime = datetime.datetime.strptime(sTimeStamp, "%Y-%m-%d %H:%M:%S")
                    tsAsTime.replace(tzinfo=None) # make sure it does not get local timezone info
                    tsAsTime = tsAsTime + datetime.timedelta(hours = -iTimeZoneOffset)
                    tsAsDate = tsAsTime.date()
                    stSQL = "INSERT INTO Data (UTTimestamp, ChannelID, Value) VALUES (?, ?, ?)"
                    for iCol in range(len(lData)):
                        if iCol > 0: # an item of data
                            # give some progress diagnostics
                            dataRecItemCt += 1
                            if dataRecItemCt % 100 == 0:
                                self.msgArea.ChangeValue("Segment " + str(iCtSeg) +
                                    " of " + str(iNumSegs) +
                                    ", HrOffset " + str(iTimeZoneOffset) +
                                    ", Line " + str(iCtSegLines) +
                                    " of " + str(iNumSegLines) + "; " +
                                    str(dataRecsAdded) + " records added, " +
                                    str(dataRecsDupSkipped) + " duplicates skipped.")
                                wx.Yield()
                            try: # much faster to try and fail than to test first
                                scidb.curD.execute(stSQL, (tsAsTime, lCh[iCol], lData[iCol]))
                                dataRecsAdded += 1 # count it
                            except sqlite3.IntegrityError: # item is already in Data table
                                dataRecsDupSkipped += 1 # count but otherwise ignore
                            finally:
                                wx.Yield()
        # <<<<< being worked on
        # finished parsing lines
        infoDict['numNewDataRecsAdded'] = dataRecsAdded
        infoDict['numDupDataRecsSkipped'] = dataRecsDupSkipped
        infoDict['stParseMsg'] = str(infoDict['lineCt']) + " lines processed; " + \
            str(dataRecsAdded) + " data records added to database, " + \
            str(dataRecsDupSkipped) + " duplicates skipped."
        self.msgArea.ChangeValue(infoDict['stParseMsg'])