def __init__(self, forecast_time, dbname, path=".", history=False): # cleo forecast files get written to our current directory, # but we might want to override that for testing self.path = path self.dbname = dbname self.dbimport = DBImport() self.forecast_time = forecast_time self.history = history # take note of when this import is happening self.import_time = datetime.utcnow().replace(second=0, microsecond=0) # for reporting and diagnostics self.files = {} self.report = [] self.quiet = False self.initCleoCommandLines() # This is a mapping of column names in the wind file header, to the # name of where we store it in our internal data dictionary # This makes adding new quantities easy. self.windFileCols = [("smphTimeList_avrg", "speed_mph"), ("LWDTimeList_avrg", "irradiance")]
def __init__(self, forecast_time, dbname, path = ".", history = False): # cleo forecast files get written to our current directory, # but we might want to override that for testing self.path = path self.dbname = dbname self.dbimport = DBImport() self.forecast_time = forecast_time self.history = history # take note of when this import is happening self.import_time = datetime.utcnow().replace(second = 0 , microsecond = 0) # for reporting and diagnostics self.files = {} self.report = [] self.quiet = False self.initCleoCommandLines() # This is a mapping of column names in the wind file header, to the # name of where we store it in our internal data dictionary # This makes adding new quantities easy. self.windFileCols = [("smphTimeList_avrg", "speed_mph") , ("LWDTimeList_avrg", "irradiance") ]
q = """INSERT INTO forecast_by_frequency (frequency, opacity, tsys, forecast_id) VALUES(%s, %s, %s, %s)""" % (frequency, opacity, tsys, id) c.query(q) def processFile(filename, forecast, date): for line in open(filename): xs = line.split() fill(xs, forecast, date) date += timedelta(hours=1) # ================================ program ================================ dbimport = DBImport() PREFIX = 'simulateTime_2006_' SUFFIXES = ['0-11', '12-23', '24-35', '36-47', '48-59'] STARTDATE = datetime(2006, 1, 1) if len(sys.argv) != 3: print "populateForecastTables.py <database name> <read_directory>" exit() database = sys.argv[1] read_dir = sys.argv[2] c = pg.connect(user="******", dbname=database, port=settings.DATABASE_PORT) for suffix in SUFFIXES:
class CleoDBImport: def __init__(self, forecast_time, dbname, path = ".", history = False): # cleo forecast files get written to our current directory, # but we might want to override that for testing self.path = path self.dbname = dbname self.dbimport = DBImport() self.forecast_time = forecast_time self.history = history # take note of when this import is happening self.import_time = datetime.utcnow().replace(second = 0 , microsecond = 0) # for reporting and diagnostics self.files = {} self.report = [] self.quiet = False self.initCleoCommandLines() # This is a mapping of column names in the wind file header, to the # name of where we store it in our internal data dictionary # This makes adding new quantities easy. self.windFileCols = [("smphTimeList_avrg", "speed_mph") , ("LWDTimeList_avrg", "irradiance") ] def initCleoCommandLines(self): """ The system calls we make to cleo are rather complicated, and will change as what we need changes. """ # here we init settings for the kind of info we're getting from the CLEO self.cleoCmdLine = "/home/dss/bin/forecastsCmdLine" freqs = range(2, 53) freqs.extend(range(54, 122, 2)) self.atmoFreqs = freqs self.numAtmoFreqs = len(freqs) freqStr = " ".join(["%d" % f for f in freqs]) measurements = ["OpacityTime", "TsysTime", "TatmTime"] measurementsStr = " ".join(measurements) sites = ["Elkins", "HotSprings", "Lewisburg"] sitesStr = " ".join(sites) if self.history: historyOption = '-startTime "%s" -incrTime 1 -mimicHistorical' % \ datetime.strftime(self.forecast_time, "%m/%d/%Y %H:%M:%S") else: historyOption = '' self.atmoCmdLine = "%s -sites %s -average -calculate %s -freqList %s -elevTsys 90 %s" % \ (self.cleoCmdLine, sitesStr, measurementsStr, freqStr, historyOption) # Then the winds, etc. measurements = ["GroundTime", "CloudsPrecipTime"] measurementsStr = " ".join(measurements) self.windCmdLine = "%s -sites %s -average -calculate %s %s" % \ (self.cleoCmdLine, sitesStr, measurementsStr, historyOption) def reportLine(self, line): if not self.quiet: print line self.report.append(line) def mph2mps(self, speed): return speed / 2.237 def getForecastTypeId(self, delta): """ Translates a time difference in hours into a database-ready, backward-compatible integer identifier, e.g., 1-8 or 9-24 or ... """ retval = int(delta) / FORECASTDELTA + SIXDELTASTART # Note: perhaps retval < SIXDELTASTART always implies SIXDELTASTART? return retval if SIXDELTASTART <= retval < MAXFORECASTTYPE else None def getForecastTypeIdFromTimestamp(self, timestamp): """ Translates a datetime using the forecast time into a database-ready, backward-compatible integer identifier, e.g., 1-8 or 9-24 or ... """ if timestamp >= self.forecast_time: td = timestamp - self.forecast_time return self.getForecastTypeId(td.days * 24. + td.seconds / 3600.) else: return SIXDELTASTART def getWeather(self): "Make actual calls to cleo to populate weather data files." print self.atmoCmdLine system(self.atmoCmdLine) print self.windCmdLine system(self.windCmdLine) # where are the files? See the cleo help: # "The results of this program are a set of files that are # placed in a newly-created subdirectory of the current dir." def read(self, forecast_file, wind_file): """ Given the existence of cleo data files, parse the files into a data dictionary. """ # init the data dictionary we will be writting to self.data = {} # we use a similar method for parsing each file, but there are enough # exceptions to warrant separate functions self.readWindFile(wind_file) self.readAtmoFile(forecast_file) # Note: we are changing our dictionary to an ordered list # why not just create the new ordered list? # i.e., self.data[time index][0] = timestamp # or self.data[time index][1][keyword][maybe freq] = value # This works, and performance isn't an issue. self.data = [(timestamp, values) \ for timestamp, values in self.data.items()] self.data.sort(key = lambda x: x[0]) def readWindFile(self, file): """ Parsing this file is straight forward: we'll need the timestamp and *scalar* quantities from each row. """ # read cleo forecast (ground) print 'Process cleo forecast data (ground) ...', file f = open(file, 'r') lines = f.readlines() header = lines[0] assert header.strip() == windFileHeader.strip() for line in lines[1:]: row = line.split(' ') timestamp = TimeAgent.hour(TimeAgent.mjd2dt(float(row[0]))) #mjd self.data[timestamp] = {} # what forecast type will this be? self.data[timestamp]['forecast_type_id'] = \ self.getForecastTypeIdFromTimestamp(timestamp) self.readWindFileValues(header, timestamp, row) # Note: we'll stop doing this eventually, but for now: # need to insert a corrected wind speed into the DB. speed_mph = self.data[timestamp]['speed_mph'] self.data[timestamp]['speed_ms'] = \ self.dbimport.correctWindSpeed(timestamp , self.mph2mps(speed_mph)) f.close() def readWindFileValues(self, header, timestamp, row): "Uses the mapping of file headers to data dict to grab values" #Ex: self.windFileCols = [("smphTimeList_avrg", "speed_mph")] for colName, dataName in self.windFileCols: col = header.split(' ').index(colName) self.data[timestamp][dataName] = float(row[col]) def readAtmoFile(self, file): """ Parsing this file is more complicated, because each row contains *vector* quantities. """ # read cleo forecast (atmosphere) print 'Process cleo forecast data (atmosphere) ... ', file f = open(file, 'r') lines = f.readlines() header = lines[0] assert header.strip() == freqFileHeader.strip() first = lines[0].split(' ') lines = lines[1:] for line in lines: row = line.split(' ') timestamp = TimeAgent.hour(TimeAgent.mjd2dt(float(row[0]))) #mjdt1 if not self.data.has_key(timestamp): self.reportLine("ERROR: No wind data for %s\n" % timestamp) continue # frequencies self.data[timestamp]['freqs'] = [] # OpacityTime<freq>List_avrg self.data[timestamp]['tauCleo'] = [] # TsysTime<freq>List_avrg self.data[timestamp]['tSysCleo'] = [] # TatmTime<freq>List_avrg self.data[timestamp]['tAtmCleo'] = [] num = self.numAtmoFreqs for i in range(num): #print first[ifreq+1], first[ifreq+51], first[ifreq+101] self.data[timestamp]['freqs'].append(self.atmoFreqs[i]) self.data[timestamp]['tauCleo'].append(float(row[i+1])) self.data[timestamp]['tSysCleo'].append(float(row[i+num+1])) self.data[timestamp]['tAtmCleo'].append(float(row[i+(num*2)+1])) f.close() def addTimeToDB(self, timestamp, table): """ Searches for given timestamp value in the given table. If it doesn't exist, create it. In any case returns it's ID. """ select_query = "SELECT id FROM %s WHERE date = '%s'" % \ (table, timestamp) insert_query = "INSERT INTO %s (date) VALUES ('%s')" % \ (table, timestamp) # timestamp already exists? r = self.c.query(select_query) if len(r.dictresult()) == 0: # it doesn't, insert it self.c.query(insert_query) # now get the id of what we just created r = self.c.query(select_query) return r.dictresult()[0]["id"] def addForecastTime(self, timestamp): return self.addTimeToDB(timestamp, "forecast_times") def addImportTime(self, timestamp): return self.addTimeToDB(timestamp, "import_times") def addWeatherDate(self, timestamp): """ Searches for given timestamp value in the weather_dates table. If it doesn't exist, create it. In any case returns it's ID. """ # look to see if this timestamp already exists r = self.c.query("SELECT id FROM weather_dates WHERE date = '%s'" % timestamp) if len(r.dictresult()) == 0: # it doesn't, insert it self.c.query("INSERT INTO weather_dates (date) VALUES ('%s')" % timestamp) # now get the id of what we just created r = \ self.c.query("SELECT id FROM weather_dates WHERE date = '%s'" % timestamp) return r.dictresult()[0]["id"] def addForecast(self , forecast_type_id , weather_date_id , forecast_time_id , import_time_id , value): """ Intelligent insert into the forcast table from the data dictionary by checking forecast type and timestamp. """ speed_mph = value['speed_mph'] speed_ms = value['speed_ms'] irradiance = value['irradiance'] r = self.c.query("""SELECT wind_speed FROM forecasts WHERE forecast_type_id = %s and weather_date_id = %s """ % (forecast_type_id, weather_date_id)) if len(r.dictresult()) == 0: q = """INSERT INTO forecasts (forecast_type_id, weather_date_id, forecast_time_id, import_time_id, wind_speed, wind_speed_mph, irradiance) VALUES (%s, %s, %s, %s, %s, %s, %s)""" % (forecast_type_id , weather_date_id , forecast_time_id , import_time_id , speed_ms , speed_mph , irradiance ) self.c.query(q) # Get the id of the forecast just inserted r = self.c.query('SELECT id from forecasts ORDER BY id DESC LIMIT 1') return r.dictresult()[0]["id"] def addForecastByFrequency(self, id, value): """ Intelligent insert into the forcast_by_frequency table from the data dictionary by checking frequency and forecast id. """ for i, (freq, tau, tAtm) in enumerate(zip(value['freqs'] , value['tauCleo'] , value['tAtmCleo'] )): r = self.c.query("""SELECT opacity, tsys FROM forecast_by_frequency WHERE forecast_id = %s AND frequency = %s """ % (id, freq)) if len(r.dictresult()) == 0: # tsys = tAtm from Cleo q = """INSERT INTO forecast_by_frequency (frequency, opacity, tsys, forecast_id) VALUES(%s, %s, %s, %s)""" % (freq, tau, tAtm, id) self.c.query(q) def insert(self): "From data dictionary into database." self.reportLine("Inserting data for forecast %s\n" % self.forecast_time) # uncomment this line if you're developing and feeling paranoid #assert self.dbname != "weather" self.c = pg.connect(user = "******", dbname = self.dbname, port = settings.DATABASE_PORT) # for the data we are inserting, record what forecast_time # this is for, and when the import was run. forecast_time_id = self.addForecastTime(self.forecast_time) import_time_id = self.addImportTime(self.import_time) for timestamp, value in self.data: forecast_type_id = value['forecast_type_id'] if value.has_key('tauCleo') and \ value.has_key('tSysCleo') and \ value.has_key('tAtmCleo'): if forecast_type_id is None: continue self.reportLine("Inserting weather for %s: %5.2f, %5.2f\n" % \ (timestamp, value['speed_mph'], value['irradiance'])) weather_dates_id = self.addWeatherDate(timestamp) forecast_id = self.addForecast(forecast_type_id , weather_dates_id , forecast_time_id , import_time_id , value) self.addForecastByFrequency(forecast_id, value) else: self.reportLine("ERROR: Got wind but not atmosphere forecasts for %s\n" % timestamp) self.c.close() def findForecastFiles(self): """ Finds the files that we would like to import, based off the assumption that they are the most recent ones in our path. """ # where are the files? See the cleo help: # "The results of this program are a set of files that are # placed in a newly-created subdirectory of the current dir." # but we can override it with the path member print "self.path: ", self.path # the last two most recent dirs are the results from our two # commands we fired off in getWeather() f1, f2 = sorted([d for d in listdir(self.path) \ if "Forecasts" in d])[-2:] # the frequency dependent realted stuff was written first atmFile = self.path + "/" + f1 + '/time_avrg' + \ f1[9:] + '.txt' # then came the 'ground' or wind speed stuff windFile = self.path + "/" + f2 + '/time_avrg' + f2[9:] + '.txt' # Check the FileList* files in each dir for FT's. # Story: https://www.pivotaltracker.com/story/show/14224103 return atmFile, windFile def reportToFile(self, filename = None): # make the filename unique by adding the timestamp (import time) timeStr = datetime.strftime(self.import_time, "%Y_%m_%d_%H_%M_%S") if filename is None: filename = "CleoDBImport" filepath = "%s_%s.txt" % (filename, timeStr) # write all the report lines to a file f = open(filepath, 'w') lines = [line + "\n" for line in self.report] f.writelines(lines) # add what files were used fileLines = ["FILES:\n"] for fileType in ['atmFile', 'windFile']: file = self.files[fileType] l = "File %s: %s\n" % (fileType, file) fileLines.append(l) f.writelines(fileLines) f.close() def performImport(self): """ Higher level function that performs all the steps for importing new forecast values into the DB: * call CLEO forecast commands to produce forecast files * reads in and parses these files into a data dict * inserts data dict contents into DB """ self.reportLine("Performing import at %s UTC" % datetime.utcnow()) # call cleo, and find the resulting files self.getWeather() atmFile, windFile = self.findForecastFiles() self.files["atmFile"] = atmFile self.files["windFile"] = windFile self.reportLine("Reading File atmFile: %s \n" % atmFile) self.reportLine("Reading File windFile: %s \n" % windFile) # parse the files self.read(atmFile, windFile) # insert the parsed data into the DB self.insert() self.reportToFile()
class CleoDBImport: def __init__(self, forecast_time, dbname, path=".", history=False): # cleo forecast files get written to our current directory, # but we might want to override that for testing self.path = path self.dbname = dbname self.dbimport = DBImport() self.forecast_time = forecast_time self.history = history # take note of when this import is happening self.import_time = datetime.utcnow().replace(second=0, microsecond=0) # for reporting and diagnostics self.files = {} self.report = [] self.quiet = False self.initCleoCommandLines() # This is a mapping of column names in the wind file header, to the # name of where we store it in our internal data dictionary # This makes adding new quantities easy. self.windFileCols = [("smphTimeList_avrg", "speed_mph"), ("LWDTimeList_avrg", "irradiance")] def initCleoCommandLines(self): """ The system calls we make to cleo are rather complicated, and will change as what we need changes. """ # here we init settings for the kind of info we're getting from the CLEO self.cleoCmdLine = "/home/dss/bin/forecastsCmdLine" freqs = range(2, 53) freqs.extend(range(54, 122, 2)) self.atmoFreqs = freqs self.numAtmoFreqs = len(freqs) freqStr = " ".join(["%d" % f for f in freqs]) measurements = ["OpacityTime", "TsysTime", "TatmTime"] measurementsStr = " ".join(measurements) sites = ["Elkins", "HotSprings", "Lewisburg"] sitesStr = " ".join(sites) if self.history: historyOption = '-startTime "%s" -incrTime 1 -mimicHistorical' % \ datetime.strftime(self.forecast_time, "%m/%d/%Y %H:%M:%S") else: historyOption = '' self.atmoCmdLine = "%s -sites %s -average -calculate %s -freqList %s -elevTsys 90 %s" % \ (self.cleoCmdLine, sitesStr, measurementsStr, freqStr, historyOption) # Then the winds, etc. measurements = ["GroundTime", "CloudsPrecipTime"] measurementsStr = " ".join(measurements) self.windCmdLine = "%s -sites %s -average -calculate %s %s" % \ (self.cleoCmdLine, sitesStr, measurementsStr, historyOption) def reportLine(self, line): if not self.quiet: print line self.report.append(line) def mph2mps(self, speed): return speed / 2.237 def getForecastTypeId(self, delta): """ Translates a time difference in hours into a database-ready, backward-compatible integer identifier, e.g., 1-8 or 9-24 or ... """ retval = int(delta) / FORECASTDELTA + SIXDELTASTART # Note: perhaps retval < SIXDELTASTART always implies SIXDELTASTART? return retval if SIXDELTASTART <= retval < MAXFORECASTTYPE else None def getForecastTypeIdFromTimestamp(self, timestamp): """ Translates a datetime using the forecast time into a database-ready, backward-compatible integer identifier, e.g., 1-8 or 9-24 or ... """ if timestamp >= self.forecast_time: td = timestamp - self.forecast_time return self.getForecastTypeId(td.days * 24. + td.seconds / 3600.) else: return SIXDELTASTART def getWeather(self): "Make actual calls to cleo to populate weather data files." print self.atmoCmdLine system(self.atmoCmdLine) print self.windCmdLine system(self.windCmdLine) # where are the files? See the cleo help: # "The results of this program are a set of files that are # placed in a newly-created subdirectory of the current dir." def read(self, forecast_file, wind_file): """ Given the existence of cleo data files, parse the files into a data dictionary. """ # init the data dictionary we will be writting to self.data = {} # we use a similar method for parsing each file, but there are enough # exceptions to warrant separate functions self.readWindFile(wind_file) self.readAtmoFile(forecast_file) # Note: we are changing our dictionary to an ordered list # why not just create the new ordered list? # i.e., self.data[time index][0] = timestamp # or self.data[time index][1][keyword][maybe freq] = value # This works, and performance isn't an issue. self.data = [(timestamp, values) \ for timestamp, values in self.data.items()] self.data.sort(key=lambda x: x[0]) def readWindFile(self, file): """ Parsing this file is straight forward: we'll need the timestamp and *scalar* quantities from each row. """ # read cleo forecast (ground) print 'Process cleo forecast data (ground) ...', file f = open(file, 'r') lines = f.readlines() header = lines[0] assert header.strip() == windFileHeader.strip() for line in lines[1:]: row = line.split(' ') timestamp = TimeAgent.hour(TimeAgent.mjd2dt(float(row[0]))) #mjd self.data[timestamp] = {} # what forecast type will this be? self.data[timestamp]['forecast_type_id'] = \ self.getForecastTypeIdFromTimestamp(timestamp) self.readWindFileValues(header, timestamp, row) # Note: we'll stop doing this eventually, but for now: # need to insert a corrected wind speed into the DB. speed_mph = self.data[timestamp]['speed_mph'] self.data[timestamp]['speed_ms'] = \ self.dbimport.correctWindSpeed(timestamp , self.mph2mps(speed_mph)) f.close() def readWindFileValues(self, header, timestamp, row): "Uses the mapping of file headers to data dict to grab values" #Ex: self.windFileCols = [("smphTimeList_avrg", "speed_mph")] for colName, dataName in self.windFileCols: col = header.split(' ').index(colName) self.data[timestamp][dataName] = float(row[col]) def readAtmoFile(self, file): """ Parsing this file is more complicated, because each row contains *vector* quantities. """ # read cleo forecast (atmosphere) print 'Process cleo forecast data (atmosphere) ... ', file f = open(file, 'r') lines = f.readlines() header = lines[0] assert header.strip() == freqFileHeader.strip() first = lines[0].split(' ') lines = lines[1:] for line in lines: row = line.split(' ') timestamp = TimeAgent.hour(TimeAgent.mjd2dt(float(row[0]))) #mjdt1 if not self.data.has_key(timestamp): self.reportLine("ERROR: No wind data for %s\n" % timestamp) continue # frequencies self.data[timestamp]['freqs'] = [] # OpacityTime<freq>List_avrg self.data[timestamp]['tauCleo'] = [] # TsysTime<freq>List_avrg self.data[timestamp]['tSysCleo'] = [] # TatmTime<freq>List_avrg self.data[timestamp]['tAtmCleo'] = [] num = self.numAtmoFreqs for i in range(num): #print first[ifreq+1], first[ifreq+51], first[ifreq+101] self.data[timestamp]['freqs'].append(self.atmoFreqs[i]) self.data[timestamp]['tauCleo'].append(float(row[i + 1])) self.data[timestamp]['tSysCleo'].append(float(row[i + num + 1])) self.data[timestamp]['tAtmCleo'].append( float(row[i + (num * 2) + 1])) f.close() def addTimeToDB(self, timestamp, table): """ Searches for given timestamp value in the given table. If it doesn't exist, create it. In any case returns it's ID. """ select_query = "SELECT id FROM %s WHERE date = '%s'" % \ (table, timestamp) insert_query = "INSERT INTO %s (date) VALUES ('%s')" % \ (table, timestamp) # timestamp already exists? r = self.c.query(select_query) if len(r.dictresult()) == 0: # it doesn't, insert it self.c.query(insert_query) # now get the id of what we just created r = self.c.query(select_query) return r.dictresult()[0]["id"] def addForecastTime(self, timestamp): return self.addTimeToDB(timestamp, "forecast_times") def addImportTime(self, timestamp): return self.addTimeToDB(timestamp, "import_times") def addWeatherDate(self, timestamp): """ Searches for given timestamp value in the weather_dates table. If it doesn't exist, create it. In any case returns it's ID. """ # look to see if this timestamp already exists r = self.c.query("SELECT id FROM weather_dates WHERE date = '%s'" % timestamp) if len(r.dictresult()) == 0: # it doesn't, insert it self.c.query("INSERT INTO weather_dates (date) VALUES ('%s')" % timestamp) # now get the id of what we just created r = \ self.c.query("SELECT id FROM weather_dates WHERE date = '%s'" % timestamp) return r.dictresult()[0]["id"] def addForecast(self, forecast_type_id, weather_date_id, forecast_time_id, import_time_id, value): """ Intelligent insert into the forcast table from the data dictionary by checking forecast type and timestamp. """ speed_mph = value['speed_mph'] speed_ms = value['speed_ms'] irradiance = value['irradiance'] r = self.c.query("""SELECT wind_speed FROM forecasts WHERE forecast_type_id = %s and weather_date_id = %s """ % (forecast_type_id, weather_date_id)) if len(r.dictresult()) == 0: q = """INSERT INTO forecasts (forecast_type_id, weather_date_id, forecast_time_id, import_time_id, wind_speed, wind_speed_mph, irradiance) VALUES (%s, %s, %s, %s, %s, %s, %s)""" % ( forecast_type_id, weather_date_id, forecast_time_id, import_time_id, speed_ms, speed_mph, irradiance) self.c.query(q) # Get the id of the forecast just inserted r = self.c.query('SELECT id from forecasts ORDER BY id DESC LIMIT 1') return r.dictresult()[0]["id"] def addForecastByFrequency(self, id, value): """ Intelligent insert into the forcast_by_frequency table from the data dictionary by checking frequency and forecast id. """ for i, (freq, tau, tAtm) in enumerate( zip(value['freqs'], value['tauCleo'], value['tAtmCleo'])): r = self.c.query("""SELECT opacity, tsys FROM forecast_by_frequency WHERE forecast_id = %s AND frequency = %s """ % (id, freq)) if len(r.dictresult()) == 0: # tsys = tAtm from Cleo q = """INSERT INTO forecast_by_frequency (frequency, opacity, tsys, forecast_id) VALUES(%s, %s, %s, %s)""" % (freq, tau, tAtm, id) self.c.query(q) def insert(self): "From data dictionary into database." self.reportLine("Inserting data for forecast %s\n" % self.forecast_time) # uncomment this line if you're developing and feeling paranoid #assert self.dbname != "weather" self.c = pg.connect(user="******", dbname=self.dbname, port=settings.DATABASE_PORT) # for the data we are inserting, record what forecast_time # this is for, and when the import was run. forecast_time_id = self.addForecastTime(self.forecast_time) import_time_id = self.addImportTime(self.import_time) for timestamp, value in self.data: forecast_type_id = value['forecast_type_id'] if value.has_key('tauCleo') and \ value.has_key('tSysCleo') and \ value.has_key('tAtmCleo'): if forecast_type_id is None: continue self.reportLine("Inserting weather for %s: %5.2f, %5.2f\n" % \ (timestamp, value['speed_mph'], value['irradiance'])) weather_dates_id = self.addWeatherDate(timestamp) forecast_id = self.addForecast(forecast_type_id, weather_dates_id, forecast_time_id, import_time_id, value) self.addForecastByFrequency(forecast_id, value) else: self.reportLine( "ERROR: Got wind but not atmosphere forecasts for %s\n" % timestamp) self.c.close() def findForecastFiles(self): """ Finds the files that we would like to import, based off the assumption that they are the most recent ones in our path. """ # where are the files? See the cleo help: # "The results of this program are a set of files that are # placed in a newly-created subdirectory of the current dir." # but we can override it with the path member print "self.path: ", self.path # the last two most recent dirs are the results from our two # commands we fired off in getWeather() f1, f2 = sorted([d for d in listdir(self.path) \ if "Forecasts" in d])[-2:] # the frequency dependent realted stuff was written first atmFile = self.path + "/" + f1 + '/time_avrg' + \ f1[9:] + '.txt' # then came the 'ground' or wind speed stuff windFile = self.path + "/" + f2 + '/time_avrg' + f2[9:] + '.txt' # Check the FileList* files in each dir for FT's. # Story: https://www.pivotaltracker.com/story/show/14224103 return atmFile, windFile def reportToFile(self, filename=None): # make the filename unique by adding the timestamp (import time) timeStr = datetime.strftime(self.import_time, "%Y_%m_%d_%H_%M_%S") if filename is None: filename = "CleoDBImport" filepath = "%s_%s.txt" % (filename, timeStr) # write all the report lines to a file f = open(filepath, 'w') lines = [line + "\n" for line in self.report] f.writelines(lines) # add what files were used fileLines = ["FILES:\n"] for fileType in ['atmFile', 'windFile']: file = self.files[fileType] l = "File %s: %s\n" % (fileType, file) fileLines.append(l) f.writelines(fileLines) f.close() def performImport(self): """ Higher level function that performs all the steps for importing new forecast values into the DB: * call CLEO forecast commands to produce forecast files * reads in and parses these files into a data dict * inserts data dict contents into DB """ self.reportLine("Performing import at %s UTC" % datetime.utcnow()) # call cleo, and find the resulting files self.getWeather() atmFile, windFile = self.findForecastFiles() self.files["atmFile"] = atmFile self.files["windFile"] = windFile self.reportLine("Reading File atmFile: %s \n" % atmFile) self.reportLine("Reading File windFile: %s \n" % windFile) # parse the files self.read(atmFile, windFile) # insert the parsed data into the DB self.insert() self.reportToFile()