Python DBImport 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: DBImport

클래스/타입: DBImport

hotexamples.com에서의 예제들: 5

Python DBImport - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 DBImport.DBImport에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

DBImport(2)

correctWindSpeed(1)

예제 #1

파일 보기

    def __init__(self, forecast_time, dbname, path=".", history=False):

        # cleo forecast files get written to our current directory,
        # but we might want to override that for testing
        self.path = path

        self.dbname = dbname
        self.dbimport = DBImport()
        self.forecast_time = forecast_time
        self.history = history

        # take note of when this import is happening
        self.import_time = datetime.utcnow().replace(second=0, microsecond=0)
        # for reporting and diagnostics
        self.files = {}
        self.report = []
        self.quiet = False

        self.initCleoCommandLines()

        # This is a mapping of column names in the wind file header, to the
        # name of where we store it in our internal data dictionary
        # This makes adding new quantities easy.
        self.windFileCols = [("smphTimeList_avrg", "speed_mph"),
                             ("LWDTimeList_avrg", "irradiance")]

예제 #2

파일 보기

파일: CleoDBImport.py 프로젝트: fusiongyro/antioch

    def __init__(self, forecast_time, dbname, path = ".", history = False):
        
        # cleo forecast files get written to our current directory,
        # but we might want to override that for testing
        self.path     = path

        self.dbname        = dbname
        self.dbimport      = DBImport()
        self.forecast_time = forecast_time
        self.history       = history

        # take note of when this import is happening
        self.import_time = datetime.utcnow().replace(second = 0
                                                   , microsecond = 0)
        # for reporting and diagnostics
        self.files = {}
        self.report = []
        self.quiet = False

        self.initCleoCommandLines()

        # This is a mapping of column names in the wind file header, to the
        # name of where we store it in our internal data dictionary
        # This makes adding new quantities easy.
        self.windFileCols = [("smphTimeList_avrg", "speed_mph")
                           , ("LWDTimeList_avrg", "irradiance")
                            ]

예제 #3

파일 보기

        q = """INSERT
                   INTO forecast_by_frequency (frequency, opacity, tsys, forecast_id)
                   VALUES(%s, %s, %s, %s)""" % (frequency, opacity, tsys, id)
        c.query(q)


def processFile(filename, forecast, date):
    for line in open(filename):
        xs = line.split()
        fill(xs, forecast, date)
        date += timedelta(hours=1)


# ================================ program ================================

dbimport = DBImport()

PREFIX = 'simulateTime_2006_'
SUFFIXES = ['0-11', '12-23', '24-35', '36-47', '48-59']
STARTDATE = datetime(2006, 1, 1)

if len(sys.argv) != 3:
    print "populateForecastTables.py <database name> <read_directory>"
    exit()

database = sys.argv[1]
read_dir = sys.argv[2]

c = pg.connect(user="******", dbname=database, port=settings.DATABASE_PORT)

for suffix in SUFFIXES:

예제 #4

파일 보기

파일: CleoDBImport.py 프로젝트: fusiongyro/antioch

class CleoDBImport:

    def __init__(self, forecast_time, dbname, path = ".", history = False):
        
        # cleo forecast files get written to our current directory,
        # but we might want to override that for testing
        self.path     = path

        self.dbname        = dbname
        self.dbimport      = DBImport()
        self.forecast_time = forecast_time
        self.history       = history

        # take note of when this import is happening
        self.import_time = datetime.utcnow().replace(second = 0
                                                   , microsecond = 0)
        # for reporting and diagnostics
        self.files = {}
        self.report = []
        self.quiet = False

        self.initCleoCommandLines()

        # This is a mapping of column names in the wind file header, to the
        # name of where we store it in our internal data dictionary
        # This makes adding new quantities easy.
        self.windFileCols = [("smphTimeList_avrg", "speed_mph")
                           , ("LWDTimeList_avrg", "irradiance")
                            ]

    def initCleoCommandLines(self):
        """
        The system calls we make to cleo are rather complicated, and will
        change as what we need changes.
        """

        # here we init settings for the kind of info we're getting from the CLEO
        self.cleoCmdLine = "/home/dss/bin/forecastsCmdLine"

        freqs = range(2, 53)
        freqs.extend(range(54, 122, 2))          
        self.atmoFreqs = freqs
        self.numAtmoFreqs = len(freqs)
        freqStr = " ".join(["%d" % f for f in freqs])

        measurements = ["OpacityTime", "TsysTime", "TatmTime"]
        measurementsStr = " ".join(measurements)
        sites = ["Elkins", "HotSprings", "Lewisburg"]
        sitesStr = " ".join(sites)

        if self.history:
            historyOption = '-startTime "%s" -incrTime 1 -mimicHistorical' % \
                datetime.strftime(self.forecast_time, "%m/%d/%Y %H:%M:%S")
        else:
            historyOption = ''

        self.atmoCmdLine = "%s -sites %s -average -calculate %s -freqList %s -elevTsys 90 %s" % \
            (self.cleoCmdLine, sitesStr, measurementsStr, freqStr, historyOption)

        # Then the winds, etc.
        measurements = ["GroundTime", "CloudsPrecipTime"]
        measurementsStr = " ".join(measurements)

        self.windCmdLine = "%s -sites %s -average -calculate %s %s" % \
            (self.cleoCmdLine, sitesStr, measurementsStr, historyOption)

    def reportLine(self, line):
        if not self.quiet:
            print line
        self.report.append(line)

    def mph2mps(self, speed):
        return speed / 2.237
    
    def getForecastTypeId(self, delta):
        """
        Translates a time difference in hours into a database-ready,
        backward-compatible integer identifier, e.g., 1-8 or 9-24 or ...
        """
        retval = int(delta) / FORECASTDELTA + SIXDELTASTART
        # Note: perhaps retval < SIXDELTASTART always implies SIXDELTASTART?
        return retval if SIXDELTASTART <= retval < MAXFORECASTTYPE else None

    def getForecastTypeIdFromTimestamp(self, timestamp):
        """
        Translates a datetime using the forecast time into a database-ready,
        backward-compatible integer identifier, e.g., 1-8 or 9-24 or ...
        """
        if timestamp >= self.forecast_time:
            td = timestamp - self.forecast_time
            return self.getForecastTypeId(td.days * 24. + td.seconds / 3600.)
        else:
            return SIXDELTASTART
        
    def getWeather(self):
        "Make actual calls to cleo to populate weather data files."

        print self.atmoCmdLine
        system(self.atmoCmdLine)

        print self.windCmdLine
        system(self.windCmdLine)

        # where are the files?  See the cleo help:
        # "The results of this program are a set of files that are 
        # placed in a newly-created subdirectory of the current dir."

    def read(self, forecast_file, wind_file):
        """
        Given the existence of cleo data files, parse the files into a
        data dictionary.
        """

        # init the data dictionary we will be writting to
        self.data = {}
        
        # we use a similar method for parsing each file, but there are enough
        # exceptions to warrant separate functions
        self.readWindFile(wind_file)
        self.readAtmoFile(forecast_file)

        # Note: we are changing our dictionary to an ordered list
        # why not just create the new ordered list?
        # i.e., self.data[time index][0] = timestamp
        # or self.data[time index][1][keyword][maybe freq] = value
        # This works, and performance isn't an issue.
        self.data = [(timestamp, values) \
            for timestamp, values in self.data.items()]
        self.data.sort(key = lambda x: x[0])

    def readWindFile(self, file):
        """
        Parsing this file is straight forward: we'll need the timestamp
        and *scalar* quantities from each row.
        """

        # read cleo forecast (ground)
        print 'Process cleo forecast data (ground) ...', file
        f          = open(file, 'r')
        lines      = f.readlines()
        header     = lines[0]
        assert header.strip() == windFileHeader.strip()  

        for line in lines[1:]:
            row = line.split(' ')
            timestamp = TimeAgent.hour(TimeAgent.mjd2dt(float(row[0]))) #mjd
            self.data[timestamp] = {}

            # what forecast type will this be?
            self.data[timestamp]['forecast_type_id'] = \
                self.getForecastTypeIdFromTimestamp(timestamp)

            self.readWindFileValues(header, timestamp, row)

            # Note: we'll stop doing this eventually, but for now:
            # need to insert a corrected wind speed into the DB.
            speed_mph = self.data[timestamp]['speed_mph']
            self.data[timestamp]['speed_ms'] = \
                self.dbimport.correctWindSpeed(timestamp
                                             , self.mph2mps(speed_mph))

        f.close()

    def readWindFileValues(self, header, timestamp, row):
        "Uses the mapping of file headers to data dict to grab values"

        #Ex: self.windFileCols = [("smphTimeList_avrg", "speed_mph")]

        for colName, dataName in self.windFileCols:
            col = header.split(' ').index(colName)
            self.data[timestamp][dataName] = float(row[col])
            
    def readAtmoFile(self, file):
        """
        Parsing this file is more complicated, because each row contains
        *vector* quantities.
        """

        # read cleo forecast (atmosphere)
        print 'Process cleo forecast data (atmosphere) ... ', file
        f     = open(file, 'r')
        lines = f.readlines()
        header = lines[0]
        assert header.strip() == freqFileHeader.strip()
        first = lines[0].split(' ')
        lines = lines[1:]
        for line in lines:
            row = line.split(' ')
            timestamp  = TimeAgent.hour(TimeAgent.mjd2dt(float(row[0]))) #mjdt1
            if not self.data.has_key(timestamp):
                self.reportLine("ERROR: No wind data for %s\n" % timestamp)
                continue
            # frequencies
            self.data[timestamp]['freqs'] = []
            # OpacityTime<freq>List_avrg
            self.data[timestamp]['tauCleo']  = []
            # TsysTime<freq>List_avrg
            self.data[timestamp]['tSysCleo'] = []
            # TatmTime<freq>List_avrg
            self.data[timestamp]['tAtmCleo'] = []
            num = self.numAtmoFreqs
            for i in range(num):
                #print first[ifreq+1], first[ifreq+51], first[ifreq+101]
                self.data[timestamp]['freqs'].append(self.atmoFreqs[i])
                self.data[timestamp]['tauCleo'].append(float(row[i+1]))
                self.data[timestamp]['tSysCleo'].append(float(row[i+num+1]))
                self.data[timestamp]['tAtmCleo'].append(float(row[i+(num*2)+1]))
        f.close()

    def addTimeToDB(self, timestamp, table):
        """
        Searches for given timestamp value in the given 
        table.  If it doesn't exist, create it.
        In any case returns it's ID.
        """

        select_query = "SELECT id FROM %s WHERE date = '%s'" % \
            (table, timestamp)
        insert_query = "INSERT INTO %s (date) VALUES ('%s')" % \
            (table, timestamp)

        # timestamp already exists?
        r = self.c.query(select_query)
        if len(r.dictresult()) == 0:
            # it doesn't, insert it
            self.c.query(insert_query)
            # now get the id of what we just created
            r = self.c.query(select_query) 

        return r.dictresult()[0]["id"]
        
    def addForecastTime(self, timestamp):
        return self.addTimeToDB(timestamp, "forecast_times")

    def addImportTime(self, timestamp):
        return self.addTimeToDB(timestamp, "import_times")

    def addWeatherDate(self, timestamp):
        """
        Searches for given timestamp value in the weather_dates
        table.  If it doesn't exist, create it.
        In any case returns it's ID.
        """

        # look to see if this timestamp already exists
        r = self.c.query("SELECT id FROM weather_dates WHERE date = '%s'" % timestamp)

        if len(r.dictresult()) == 0:
            # it doesn't, insert it
            self.c.query("INSERT INTO weather_dates (date) VALUES ('%s')" % timestamp)
            # now get the id of what we just created
            r = \
                self.c.query("SELECT id FROM weather_dates WHERE date = '%s'" % timestamp)

        return r.dictresult()[0]["id"]

    def addForecast(self
                  , forecast_type_id
                  , weather_date_id
                  , forecast_time_id
                  , import_time_id
                  , value):
        """
        Intelligent insert into the forcast table from the data dictionary
        by checking forecast type and timestamp.
        """
        speed_mph  = value['speed_mph']
        speed_ms   = value['speed_ms']
        irradiance = value['irradiance']
        r = self.c.query("""SELECT wind_speed
                            FROM forecasts
                            WHERE forecast_type_id = %s and weather_date_id = %s
                         """ % (forecast_type_id, weather_date_id))

        if len(r.dictresult()) == 0:
            q = """INSERT
                   INTO forecasts (forecast_type_id, weather_date_id, forecast_time_id, import_time_id, wind_speed, wind_speed_mph, irradiance)
                   VALUES (%s, %s, %s, %s, %s, %s, %s)""" % (forecast_type_id
                                           , weather_date_id
                                           , forecast_time_id
                                           , import_time_id
                                           , speed_ms
                                           , speed_mph
                                           , irradiance
                                             )
            self.c.query(q)

        # Get the id of the forecast just inserted
        r  = self.c.query('SELECT id from forecasts ORDER BY id DESC LIMIT 1')
        return r.dictresult()[0]["id"]

    def addForecastByFrequency(self, id, value):
        """
        Intelligent insert into the forcast_by_frequency table from the
        data dictionary
        by checking frequency and forecast id.
        """
        for i, (freq, tau, tAtm) in enumerate(zip(value['freqs']
                                          , value['tauCleo']
                                          , value['tAtmCleo']
                                        )):
            r = self.c.query("""SELECT opacity, tsys
                                FROM forecast_by_frequency
                                WHERE forecast_id = %s AND frequency = %s
                             """ % (id, freq))
            if len(r.dictresult()) == 0:
                # tsys = tAtm from Cleo
                q = """INSERT
                       INTO forecast_by_frequency (frequency, opacity, tsys, forecast_id)
                       VALUES(%s, %s, %s, %s)""" % (freq, tau, tAtm, id)
                self.c.query(q)

    def insert(self):
        "From data dictionary into database."

        self.reportLine("Inserting data for forecast %s\n" % self.forecast_time)

        # uncomment this line if you're developing and feeling paranoid
        #assert self.dbname != "weather"          
        self.c = pg.connect(user = "******", dbname = self.dbname, port = settings.DATABASE_PORT)

        # for the data we are inserting, record what forecast_time
        # this is for, and when the import was run.
        forecast_time_id = self.addForecastTime(self.forecast_time)
        import_time_id   = self.addImportTime(self.import_time)

        for timestamp, value in self.data:
            forecast_type_id = value['forecast_type_id']
            if value.has_key('tauCleo') and \
               value.has_key('tSysCleo') and \
               value.has_key('tAtmCleo'):
                if forecast_type_id is None:
                    continue

                self.reportLine("Inserting weather for %s: %5.2f, %5.2f\n" % \
                    (timestamp, value['speed_mph'], value['irradiance']))

                weather_dates_id = self.addWeatherDate(timestamp)
                forecast_id = self.addForecast(forecast_type_id
                                             , weather_dates_id
                                             , forecast_time_id
                                             , import_time_id
                                             , value)
                self.addForecastByFrequency(forecast_id, value)
            else:
                self.reportLine("ERROR: Got wind but not atmosphere forecasts for %s\n" % timestamp)

        self.c.close()

    def findForecastFiles(self):
        """
        Finds the files that we would like to import, based off the 
        assumption that they are the most recent ones in our path.
        """

        # where are the files?  See the cleo help:
        # "The results of this program are a set of files that are 
        # placed in a newly-created subdirectory of the current dir."
        # but we can override it with the path member
        print "self.path: ", self.path

        # the last two most recent dirs are the results from our two
        # commands we fired off in getWeather()
        f1, f2 = sorted([d for d in listdir(self.path) \
            if "Forecasts" in d])[-2:]

        # the frequency dependent realted stuff was written first    
        atmFile =  self.path + "/" + f1 + '/time_avrg' + \
            f1[9:] + '.txt'

        # then came the 'ground' or wind speed stuff     
        windFile = self.path + "/" + f2 + '/time_avrg' + f2[9:] + '.txt'
        
        # Check the FileList* files in each dir for FT's.
        # Story: https://www.pivotaltracker.com/story/show/14224103

        return atmFile, windFile

    def reportToFile(self, filename = None):

        # make the filename unique by adding the timestamp (import time)
        timeStr = datetime.strftime(self.import_time, "%Y_%m_%d_%H_%M_%S")
        if filename is None:
            filename = "CleoDBImport"
        filepath = "%s_%s.txt" % (filename, timeStr)

        # write all the report lines to a file
        f = open(filepath, 'w')
        lines = [line + "\n" for line in self.report]
        f.writelines(lines)
        # add what files were used
        fileLines = ["FILES:\n"]
        for fileType in ['atmFile', 'windFile']:
            file = self.files[fileType]
            l = "File %s: %s\n" % (fileType, file)
            fileLines.append(l)
        f.writelines(fileLines)
        f.close()

    def performImport(self):
        """
        Higher level function that performs all the steps for importing
        new forecast values into the DB:
            * call CLEO forecast commands to produce forecast files
            * reads in and parses these files into a data dict
            * inserts data dict contents into DB
        """

        self.reportLine("Performing import at %s UTC" % datetime.utcnow())

        # call cleo, and find the resulting files
        self.getWeather()
        atmFile, windFile = self.findForecastFiles()

        self.files["atmFile"] = atmFile
        self.files["windFile"] = windFile
        self.reportLine("Reading File atmFile: %s \n" % atmFile)
        self.reportLine("Reading File windFile: %s \n" % windFile)        

        # parse the files
        self.read(atmFile, windFile)

        # insert the parsed data into the DB
        self.insert()

        self.reportToFile()

예제 #5

파일 보기

class CleoDBImport:
    def __init__(self, forecast_time, dbname, path=".", history=False):

        # cleo forecast files get written to our current directory,
        # but we might want to override that for testing
        self.path = path

        self.dbname = dbname
        self.dbimport = DBImport()
        self.forecast_time = forecast_time
        self.history = history

        # take note of when this import is happening
        self.import_time = datetime.utcnow().replace(second=0, microsecond=0)
        # for reporting and diagnostics
        self.files = {}
        self.report = []
        self.quiet = False

        self.initCleoCommandLines()

        # This is a mapping of column names in the wind file header, to the
        # name of where we store it in our internal data dictionary
        # This makes adding new quantities easy.
        self.windFileCols = [("smphTimeList_avrg", "speed_mph"),
                             ("LWDTimeList_avrg", "irradiance")]

    def initCleoCommandLines(self):
        """
        The system calls we make to cleo are rather complicated, and will
        change as what we need changes.
        """

        # here we init settings for the kind of info we're getting from the CLEO
        self.cleoCmdLine = "/home/dss/bin/forecastsCmdLine"

        freqs = range(2, 53)
        freqs.extend(range(54, 122, 2))
        self.atmoFreqs = freqs
        self.numAtmoFreqs = len(freqs)
        freqStr = " ".join(["%d" % f for f in freqs])

        measurements = ["OpacityTime", "TsysTime", "TatmTime"]
        measurementsStr = " ".join(measurements)
        sites = ["Elkins", "HotSprings", "Lewisburg"]
        sitesStr = " ".join(sites)

        if self.history:
            historyOption = '-startTime "%s" -incrTime 1 -mimicHistorical' % \
                datetime.strftime(self.forecast_time, "%m/%d/%Y %H:%M:%S")
        else:
            historyOption = ''

        self.atmoCmdLine = "%s -sites %s -average -calculate %s -freqList %s -elevTsys 90 %s" % \
            (self.cleoCmdLine, sitesStr, measurementsStr, freqStr, historyOption)

        # Then the winds, etc.
        measurements = ["GroundTime", "CloudsPrecipTime"]
        measurementsStr = " ".join(measurements)

        self.windCmdLine = "%s -sites %s -average -calculate %s %s" % \
            (self.cleoCmdLine, sitesStr, measurementsStr, historyOption)

    def reportLine(self, line):
        if not self.quiet:
            print line
        self.report.append(line)

    def mph2mps(self, speed):
        return speed / 2.237

    def getForecastTypeId(self, delta):
        """
        Translates a time difference in hours into a database-ready,
        backward-compatible integer identifier, e.g., 1-8 or 9-24 or ...
        """
        retval = int(delta) / FORECASTDELTA + SIXDELTASTART
        # Note: perhaps retval < SIXDELTASTART always implies SIXDELTASTART?
        return retval if SIXDELTASTART <= retval < MAXFORECASTTYPE else None

    def getForecastTypeIdFromTimestamp(self, timestamp):
        """
        Translates a datetime using the forecast time into a database-ready,
        backward-compatible integer identifier, e.g., 1-8 or 9-24 or ...
        """
        if timestamp >= self.forecast_time:
            td = timestamp - self.forecast_time
            return self.getForecastTypeId(td.days * 24. + td.seconds / 3600.)
        else:
            return SIXDELTASTART

    def getWeather(self):
        "Make actual calls to cleo to populate weather data files."

        print self.atmoCmdLine
        system(self.atmoCmdLine)

        print self.windCmdLine
        system(self.windCmdLine)

        # where are the files?  See the cleo help:
        # "The results of this program are a set of files that are
        # placed in a newly-created subdirectory of the current dir."

    def read(self, forecast_file, wind_file):
        """
        Given the existence of cleo data files, parse the files into a
        data dictionary.
        """

        # init the data dictionary we will be writting to
        self.data = {}

        # we use a similar method for parsing each file, but there are enough
        # exceptions to warrant separate functions
        self.readWindFile(wind_file)
        self.readAtmoFile(forecast_file)

        # Note: we are changing our dictionary to an ordered list
        # why not just create the new ordered list?
        # i.e., self.data[time index][0] = timestamp
        # or self.data[time index][1][keyword][maybe freq] = value
        # This works, and performance isn't an issue.
        self.data = [(timestamp, values) \
            for timestamp, values in self.data.items()]
        self.data.sort(key=lambda x: x[0])

    def readWindFile(self, file):
        """
        Parsing this file is straight forward: we'll need the timestamp
        and *scalar* quantities from each row.
        """

        # read cleo forecast (ground)
        print 'Process cleo forecast data (ground) ...', file
        f = open(file, 'r')
        lines = f.readlines()
        header = lines[0]
        assert header.strip() == windFileHeader.strip()

        for line in lines[1:]:
            row = line.split(' ')
            timestamp = TimeAgent.hour(TimeAgent.mjd2dt(float(row[0])))  #mjd
            self.data[timestamp] = {}

            # what forecast type will this be?
            self.data[timestamp]['forecast_type_id'] = \
                self.getForecastTypeIdFromTimestamp(timestamp)

            self.readWindFileValues(header, timestamp, row)

            # Note: we'll stop doing this eventually, but for now:
            # need to insert a corrected wind speed into the DB.
            speed_mph = self.data[timestamp]['speed_mph']
            self.data[timestamp]['speed_ms'] = \
                self.dbimport.correctWindSpeed(timestamp
                                             , self.mph2mps(speed_mph))

        f.close()

    def readWindFileValues(self, header, timestamp, row):
        "Uses the mapping of file headers to data dict to grab values"

        #Ex: self.windFileCols = [("smphTimeList_avrg", "speed_mph")]

        for colName, dataName in self.windFileCols:
            col = header.split(' ').index(colName)
            self.data[timestamp][dataName] = float(row[col])

    def readAtmoFile(self, file):
        """
        Parsing this file is more complicated, because each row contains
        *vector* quantities.
        """

        # read cleo forecast (atmosphere)
        print 'Process cleo forecast data (atmosphere) ... ', file
        f = open(file, 'r')
        lines = f.readlines()
        header = lines[0]
        assert header.strip() == freqFileHeader.strip()
        first = lines[0].split(' ')
        lines = lines[1:]
        for line in lines:
            row = line.split(' ')
            timestamp = TimeAgent.hour(TimeAgent.mjd2dt(float(row[0])))  #mjdt1
            if not self.data.has_key(timestamp):
                self.reportLine("ERROR: No wind data for %s\n" % timestamp)
                continue
            # frequencies
            self.data[timestamp]['freqs'] = []
            # OpacityTime<freq>List_avrg
            self.data[timestamp]['tauCleo'] = []
            # TsysTime<freq>List_avrg
            self.data[timestamp]['tSysCleo'] = []
            # TatmTime<freq>List_avrg
            self.data[timestamp]['tAtmCleo'] = []
            num = self.numAtmoFreqs
            for i in range(num):
                #print first[ifreq+1], first[ifreq+51], first[ifreq+101]
                self.data[timestamp]['freqs'].append(self.atmoFreqs[i])
                self.data[timestamp]['tauCleo'].append(float(row[i + 1]))
                self.data[timestamp]['tSysCleo'].append(float(row[i + num +
                                                                  1]))
                self.data[timestamp]['tAtmCleo'].append(
                    float(row[i + (num * 2) + 1]))
        f.close()

    def addTimeToDB(self, timestamp, table):
        """
        Searches for given timestamp value in the given 
        table.  If it doesn't exist, create it.
        In any case returns it's ID.
        """

        select_query = "SELECT id FROM %s WHERE date = '%s'" % \
            (table, timestamp)
        insert_query = "INSERT INTO %s (date) VALUES ('%s')" % \
            (table, timestamp)

        # timestamp already exists?
        r = self.c.query(select_query)
        if len(r.dictresult()) == 0:
            # it doesn't, insert it
            self.c.query(insert_query)
            # now get the id of what we just created
            r = self.c.query(select_query)

        return r.dictresult()[0]["id"]

    def addForecastTime(self, timestamp):
        return self.addTimeToDB(timestamp, "forecast_times")

    def addImportTime(self, timestamp):
        return self.addTimeToDB(timestamp, "import_times")

    def addWeatherDate(self, timestamp):
        """
        Searches for given timestamp value in the weather_dates
        table.  If it doesn't exist, create it.
        In any case returns it's ID.
        """

        # look to see if this timestamp already exists
        r = self.c.query("SELECT id FROM weather_dates WHERE date = '%s'" %
                         timestamp)

        if len(r.dictresult()) == 0:
            # it doesn't, insert it
            self.c.query("INSERT INTO weather_dates (date) VALUES ('%s')" %
                         timestamp)
            # now get the id of what we just created
            r = \
                self.c.query("SELECT id FROM weather_dates WHERE date = '%s'" % timestamp)

        return r.dictresult()[0]["id"]

    def addForecast(self, forecast_type_id, weather_date_id, forecast_time_id,
                    import_time_id, value):
        """
        Intelligent insert into the forcast table from the data dictionary
        by checking forecast type and timestamp.
        """
        speed_mph = value['speed_mph']
        speed_ms = value['speed_ms']
        irradiance = value['irradiance']
        r = self.c.query("""SELECT wind_speed
                            FROM forecasts
                            WHERE forecast_type_id = %s and weather_date_id = %s
                         """ % (forecast_type_id, weather_date_id))

        if len(r.dictresult()) == 0:
            q = """INSERT
                   INTO forecasts (forecast_type_id, weather_date_id, forecast_time_id, import_time_id, wind_speed, wind_speed_mph, irradiance)
                   VALUES (%s, %s, %s, %s, %s, %s, %s)""" % (
                forecast_type_id, weather_date_id, forecast_time_id,
                import_time_id, speed_ms, speed_mph, irradiance)
            self.c.query(q)

        # Get the id of the forecast just inserted
        r = self.c.query('SELECT id from forecasts ORDER BY id DESC LIMIT 1')
        return r.dictresult()[0]["id"]

    def addForecastByFrequency(self, id, value):
        """
        Intelligent insert into the forcast_by_frequency table from the
        data dictionary
        by checking frequency and forecast id.
        """
        for i, (freq, tau, tAtm) in enumerate(
                zip(value['freqs'], value['tauCleo'], value['tAtmCleo'])):
            r = self.c.query("""SELECT opacity, tsys
                                FROM forecast_by_frequency
                                WHERE forecast_id = %s AND frequency = %s
                             """ % (id, freq))
            if len(r.dictresult()) == 0:
                # tsys = tAtm from Cleo
                q = """INSERT
                       INTO forecast_by_frequency (frequency, opacity, tsys, forecast_id)
                       VALUES(%s, %s, %s, %s)""" % (freq, tau, tAtm, id)
                self.c.query(q)

    def insert(self):
        "From data dictionary into database."

        self.reportLine("Inserting data for forecast %s\n" %
                        self.forecast_time)

        # uncomment this line if you're developing and feeling paranoid
        #assert self.dbname != "weather"
        self.c = pg.connect(user="******",
                            dbname=self.dbname,
                            port=settings.DATABASE_PORT)

        # for the data we are inserting, record what forecast_time
        # this is for, and when the import was run.
        forecast_time_id = self.addForecastTime(self.forecast_time)
        import_time_id = self.addImportTime(self.import_time)

        for timestamp, value in self.data:
            forecast_type_id = value['forecast_type_id']
            if value.has_key('tauCleo') and \
               value.has_key('tSysCleo') and \
               value.has_key('tAtmCleo'):
                if forecast_type_id is None:
                    continue

                self.reportLine("Inserting weather for %s: %5.2f, %5.2f\n" % \
                    (timestamp, value['speed_mph'], value['irradiance']))

                weather_dates_id = self.addWeatherDate(timestamp)
                forecast_id = self.addForecast(forecast_type_id,
                                               weather_dates_id,
                                               forecast_time_id,
                                               import_time_id, value)
                self.addForecastByFrequency(forecast_id, value)
            else:
                self.reportLine(
                    "ERROR: Got wind but not atmosphere forecasts for %s\n" %
                    timestamp)

        self.c.close()

    def findForecastFiles(self):
        """
        Finds the files that we would like to import, based off the 
        assumption that they are the most recent ones in our path.
        """

        # where are the files?  See the cleo help:
        # "The results of this program are a set of files that are
        # placed in a newly-created subdirectory of the current dir."
        # but we can override it with the path member
        print "self.path: ", self.path

        # the last two most recent dirs are the results from our two
        # commands we fired off in getWeather()
        f1, f2 = sorted([d for d in listdir(self.path) \
            if "Forecasts" in d])[-2:]

        # the frequency dependent realted stuff was written first
        atmFile =  self.path + "/" + f1 + '/time_avrg' + \
            f1[9:] + '.txt'

        # then came the 'ground' or wind speed stuff
        windFile = self.path + "/" + f2 + '/time_avrg' + f2[9:] + '.txt'

        # Check the FileList* files in each dir for FT's.
        # Story: https://www.pivotaltracker.com/story/show/14224103

        return atmFile, windFile

    def reportToFile(self, filename=None):

        # make the filename unique by adding the timestamp (import time)
        timeStr = datetime.strftime(self.import_time, "%Y_%m_%d_%H_%M_%S")
        if filename is None:
            filename = "CleoDBImport"
        filepath = "%s_%s.txt" % (filename, timeStr)

        # write all the report lines to a file
        f = open(filepath, 'w')
        lines = [line + "\n" for line in self.report]
        f.writelines(lines)
        # add what files were used
        fileLines = ["FILES:\n"]
        for fileType in ['atmFile', 'windFile']:
            file = self.files[fileType]
            l = "File %s: %s\n" % (fileType, file)
            fileLines.append(l)
        f.writelines(fileLines)
        f.close()

    def performImport(self):
        """
        Higher level function that performs all the steps for importing
        new forecast values into the DB:
            * call CLEO forecast commands to produce forecast files
            * reads in and parses these files into a data dict
            * inserts data dict contents into DB
        """

        self.reportLine("Performing import at %s UTC" % datetime.utcnow())

        # call cleo, and find the resulting files
        self.getWeather()
        atmFile, windFile = self.findForecastFiles()

        self.files["atmFile"] = atmFile
        self.files["windFile"] = windFile
        self.reportLine("Reading File atmFile: %s \n" % atmFile)
        self.reportLine("Reading File windFile: %s \n" % windFile)

        # parse the files
        self.read(atmFile, windFile)

        # insert the parsed data into the DB
        self.insert()

        self.reportToFile()