def FindFiles(file_format, path=None, date_min=None, date_max=None): """ Find the concentration files that match the pattern/format provided Keyword Arguments: path:*string* Path to look for files file_format:*string* A format containing wildcards (*) and date indicators, i.e. YYYY, YY, MM, DD or JJJ for Julian day date_min:*Datetime* If set, this is the minimum accepted date date_max:*Datetime* If set, this is the maximum accepted date Returns: *list[DataFile]* Returns a list of Datafiles """ if path == None: path = self.inputPath if path == None: raise ValueError("Must provide a path to search") if path[-1] != "/": path=path+"/" #files=os.listdir( "/mnt/mediasonic/opt/output/morteza/frc-8h-US/" ) # Obviously change this.. files=os.listdir(path) if date_min!=None and not isinstance(date_min, date): raise TypeError("Minimum date may either be None or a DateTime") if date_max!=None and not isinstance(date_max, date): raise TypeError("Maximum date may either be None or a DateTime") # Backup reg=file_format # Year reg=re.sub(r'YYYY', '\\d{4}', reg) reg=re.sub(r'MM', '\\d{2}', reg) reg=re.sub(r'DD', '\\d{2}', reg) reg=re.sub(r'JJJ', '\\d{3}', reg) reg=re.sub(r'\*', '.*', reg) #print "RE: %s"% reg cfiles=[] for f in files: #print "Does %s match?"%f if re.search(reg, f): #print "%s matches"%f df=DataFile(f, path=path, file_format=file_format) df.loadDate() #is_between_date = df.date>=date_min and df.date<=date_max #print "df.date=%s, between [%s %s]?=%r type(df.date)=%s, type(date_min)=%s"%(df.date, date_min, date_max, is_between_date, type(df.date), type(date_min)) if (date_min == None and date_max == None) or ( (date_min != None and df.date >= date_min) and (date_max != None and df.date <= date_max) ): #print "File added" cfiles.append(df) #return sorted(cfiles, key=lambda student: .age) return sorted(cfiles)
class ForcingValidator: LAY_SURFACE_NAME='Surface' ni=None nj=None nk=None ns=None nt=None conc = None def __init__(self,filename): self.conc=DataFile(filename, mode='r', open=True) self.ni = self.conc.dimensions['COL'] self.nj = self.conc.dimensions['ROW'] self.nk = self.conc.dimensions['LAY'] # TSTEP is unlimited, so python has problems reading it # So instead we'll examine the shape of a variable # Let's assume TFLAG exists shape = self.conc.variables['TFLAG'].shape # This first element is TSTEP self.nt = shape[0] ns = len(self.conc.variables.keys()) def close(self): try: self.conc.close() except IOError: # nothing.. it's closed. self.conc = None def __del__(self): self.close() def changeFile(self, newfile): self.conc.close(); self.conc=DataFile(newfile, mode='r', open=True) def getDate(self): """ Again, not a validator just a getter. Useful to know the date of the concentration file being used. Since we're using an I/O Api file, we'll look at the SDATE attribute. Returns: datetime """ self.conc.loadDate() return self.conc.date # # Get the sdate, in the format YYYYJJJ # if not hasattr(self.conc, 'SDATE'): # raise IOError("Sample concentration file does not seem to be a proper I/O Api file.") # # sdate=str(getattr(self.conc, 'SDATE')) # # Sometimes sdate has brackets around it # if sdate[0] == "[" and sdate[-1] == "]": # sdate=sdate[1:-1] # year=int(sdate[:4]) # jday=int(sdate[4:]) # # date = datetime.date(year, 1, 1) # days = datetime.timedelta(days=jday-1) # -1 because we started at day 1 # date=date+days # # return date def getLayers(self): """Return a list of layers. This isn't really a validator, but it shares a lot of the functionality. Assumes that there's always a ground layer. Returns: list of layers """ num_layers = self.conc.dimensions['LAY'] layers=[self.LAY_SURFACE_NAME] for l in range(2, num_layers): layers+=str(l) return layers def getTimes(self): """Return a list of times(hours). This isn't really a validator, but it shares a lot of the functionality. Assumes that there's always a ground layer. Returns: list of times """ shape = self.conc.variables['TFLAG'].shape nt = shape[0] times=list(xrange(nt)) # Cut off the 25th time for t in range(0, nt-1): times[t]=str(t) return times def getSpecies(self): """Return a list of species. This isn't really a validator, but it shares a lot of the functionality Returns: list of species """ vars = self.conc.variables.keys() for i, var in enumerate(vars): vars[i]=var.upper() vars=sorted(vars) pri_vars = [] normal_vars = [] # Put some of the important ones up top for var in vars: # Select case basically if var in ['O3', 'NO', 'NO2']: pri_vars.append(var) elif var == "TFLAG": continue else: normal_vars.append(var) return pri_vars+normal_vars # Check to ensure all the chosen species are available # Species is a string vector def validateSpecies(self, species): """Validate species against a sample datafile variables Keyword Arguments: species -- Vector of species names to use Raises: ValidationError - if invalid species is input Returns: TRUE if valid, false otherwise """ #print "Got species", '[%s]' % ', '.join(map(str, species)) vars = self.conc.variables.keys() for i in range(0, len(vars)): vars[i]=vars[i].upper() notFound=[] for s in species: found=False for v in vars: if v == s: found=True break if found == False: notFound.append(s) if len(notFound)>0: raise ValidationError("Invalid species: ", '[%s]' % ', '.join(map(str, notFound))) return False; return True def validateLayers(self,layers): """Validate layers against a sample datafile file Keyword Arguments: layers -- Vector of layers to use Raises: ValidationError - if invalid layer is input Returns: TRUE if valid, false otherwise """ num_layers = self.conc.dimensions['LAY'] for l in layers: if l > num_layers: raise ValidationError("Invalid layer: ", l) return False return True def validateTimes(self,times): """Validate times against a sample datafile file Keyword Arguments: times -- Vector of times to use Raises: ValidationError -- if invalid times step is input Returns: TRUE if valid, false otherwise """ # Not yet implemented return True def ValidateDataFileSurface(self, filename): """ Validates a datafile by checking if it's 2D surface domani (ni,nj) matches the sample file """ datafile=DataFile(filename, mode='r', open=True) #print "COL %d, self.ni: %d - ROW: %d, self.nj: %d"%(datafile.dimensions['COL'], self.ni, datafile.dimensions['ROW'], self.nj) return datafile.dimensions['COL'] == self.ni and datafile.dimensions['ROW'] == self.nj