def test_na1001CurlyWithCurlyBraces(self): "Tests an input file with curly braces." cb_file = os.path.join(data_files, "1001_cb.na") fin = nappy.openNAFile(cb_file) fin.readData() na_dict = fin.getNADict() foutname = os.path.join(test_outputs, "test_1001_cb_rewritten.na") fobj = nappy.openNAFile(foutname, mode="w", na_dict=na_dict) fobj.write() self.failUnless(isinstance(fobj, nappy.na_file.na_file.NAFile))
def __init__(self, fname): """ :type fname: Filesystem path to the NASA Ames file """ self.fname = fname self.na = nappy.openNAFile(self.fname) self._data_in_memory = False
def setUp(self): self.infile = os.path.join(data_files, "1010.na") self.outfile = os.path.join(test_outputs, "test_1010.na") self.out_csv = os.path.join(test_outputs, "test_1010.csv") self.out_csv_annotated = os.path.join(test_outputs, "test_1010_annotated.csv") self.fin = nappy.openNAFile(self.infile) self.fin.readData() self.na_dict = self.fin.getNADict()
def _writeNAFileSubsetsWithinSizeLimit(self, this_na_dict, file_name, delimiter, float_format, size_limit, annotation): """ If self.size_limit is specified and FFI is 1001 we can chunk the output into different files in a NASA Ames compliant way. Returns list of file names of outputs written. """ file_names = [] var_list = this_na_dict["V"] array_length = len(var_list[0]) nvol_info = divmod(array_length, size_limit) nvol = nvol_info[0] # create the number of volumes (files) that need to be written. if nvol_info[1] > 0: nvol = nvol + 1 start = 0 letter_count = 0 ivol = 0 # Loop through until full array length has been written to a set of files. while start < array_length: ivol = ivol + 1 end = start + size_limit if end > array_length: end = array_length current_block = [] # Write new V array for v in var_list: current_block.append(v[start:end]) # Adjust X accordingly in the na dictionary, because independent variable has been reduced in size na_dict_copy = nappy.utils.common_utils.modifyNADictCopy(this_na_dict, current_block, start, end, ivol, nvol) # Append a letter to the file name for writing this block to file_name_plus_letter = "%s-%.3d.na" % (file_name[:-3], ivol) file_list.append(file_name_plus_letter) # Write data to output file x = nappy.openNAFile(file_name_plus_letter, 'w', na_dict_copy) x.write(delimiter=delimiter, float_format=float_format, annotation=annotation) x.close() msg = "\nOutput files split on size limit: %s\nFilename used: %s" % (size_limit, file_name_plus_letter) if DEBUG: log.debug(msg) self.output_message.append(msg) letter_count = letter_count + 1 start = end file_names.append(file_name_plus_letter) return file_names
def phenomena(self): try: na_fhandle = nappy.openNAFile(self.file_path) variables = {} for var in na_fhandle.getVariables(): if util.is_valid_phen_attr(var[1]): variables.update({ var[0]: { "name": var[0], "units": var[1] } }) variables = [util.Parameter(k, other_params=var) for (k, var) in variables.iteritems()] return variables except Exception: return None
def save_na_file(self, filename, na_dict=None, float_format='%.2f'): """ Save a NASA/Ames dictionary to a file. :param string filename: String name of the file to be writed. :param dict na_dict: Optional - The NASA/Ames dictionary to be saved. If no dictionary is entered, the dictionary currently opened during the open file process will be saved. :param string float_format: Optional - The format of numbers to be saved. If no string is entered, values are round up to two decimal places. """ if not na_dict: na_dict = self.f.na_dict saved_file = nappy.openNAFile(filename, mode="w", na_dict=na_dict) saved_file.write(float_format=float_format) saved_file.close()
def _open_file(self, filename, perms): """ Private method for opening NASA Ames file using Nappy API. :parm string filename: Name of NASA Ames file to open. :param char perms: Permissions used to open file. Options are ``w`` for write (overwrites data in file), ``a`` and ``r+`` for append, and ``r`` for read. """ self.close() try: self.f = nappy.openNAFile(filename, mode=perms) self.filename = filename self.perms = perms attr_dict = {} attr_dict['Comments'] = self.f.getNormalComments attr_dict['SpecialComments'] = self.f.getSpecialComments() attr_dict['Organisation'] = self.f.getOrganisation() dates = self.f.getFileDates() attr_dict['CreationDate'] = dates[0] attr_dict['RevisionDate'] = dates[1] attr_dict['Originator'] = self.f.getOriginator() attr_dict['Mission'] = self.f.getMission() attr_dict['Source'] = self.f.getSource() self.file_metadata = egads.core.metadata.FileMetadata(attr_dict, self.filename, conventions="NASAAmes") except RuntimeError: print "ERROR: File %s doesn't exist" % (filename) raise RuntimeError except Exception: print "ERROR: Unexpected error" raise
def convert_to_nasa_ames(self, na_file=None, requested_ffi=None, delimiter=' ', float_format='%g', size_limit=None, annotation=False, no_header=False): """ Convert currently open NetCDF file to one or more NASA Ames files using Nappy. :param string na_file: Optional - Name of output NASA Ames file. If none is provided, name of current NetCDF file is used and suffix changed to .na :param int requested_ffi: The NASA Ames File Format Index (FFI) you wish to write to. Options are limited depending on the data structures found. :param string delimiter: Optional - The delimiter desired for use between data items in the data file. Default - Tab. :param string float_format: Optional - The formatting string used for formatting floats when writing to output file. Default - %g :param int size_limit: Optional - If format FFI is 1001 then chop files into size_limit rows of data. :param bool annotation: Optional - If set to true, write the output file with an additional left-hand column describing the contents of each header line. Default - False. :param bool no_header: Optional - If set to true, then only the data blocks are written to file. Default - False. """ na_dict = {"A":"","AMISS":"","ANAME":"","ASCAL":"","DATE":"","DX":"", "FFI":"","IVOL":"","LENA":"","LENX":"","MNAME":"","NAUXC":"", "NAUXV":"","NCOM":"","NIV":"","NLHEAD":"","NNCOML":"", "NSCOML":"","NV":"","NVOL":"","NVPM":"","NX":"","NXDEF":"", "ONAME":"","ORG":"","RDATE":"","SCOM":"","SNAME":"","V":"", "VMISS":"","VNAME":"","VSCAL":"","X":"","XNAME":""} """Conventions = None source = SNAME title = MNAME institution = ONAME&ORG references = None comment = SCOM&NCOM history = RDATE file_format_index = FFI no_of_nasa_ames_header_lines = NLHEAD total_files_in_set = NVOL file_number_in_set = IVOL first_valid_date_of_data = DATE""" if na_file is None: na_file = self.filename self.f_out = nappy.openNAFile(na_file, mode="w", na_dict=na_dict) self.f_out.write() self.f_out.close()
# Import standard library modules import unittest import os import sys import nappy import nappy.utils.compare_na # Common test info here = os.path.dirname(__file__) example_files = os.path.join(here, '../../example_files') # Common set up for these tests infile = os.path.join(example_files, "1001.na") fin = nappy.openNAFile(infile) fin.readData() na_dict = fin.getNADict() def test_read1001(): "Tests reading FFI 1001." assert(type(na_dict) == dict) def test_write1001(tmpdir): "Tests writing FFI 1001." outfile = os.path.join(tmpdir.strpath, "test_1001.na") fobj = nappy.openNAFile(outfile, mode="w", na_dict=na_dict) fobj.write() assert(isinstance(fobj, nappy.na_file.na_file.NAFile))
gemaq_time, gemaq_o3, norm_gemaq_o3 = read_model(glob.glob('model_files/GEMAQ*')) geoschem_time, geoschem_o3, norm_geoschem_o3 = read_model(glob.glob('model_files/GEOSChem*')) giss_time, giss_o3, norm_giss_o3 = read_model(glob.glob('model_files/GISS-PUCCINI-modelEaer_SR1_sfc*')) giss_alt_time, giss_alt_o3, norm_giss_alt_o3 = read_model(glob.glob('model_files/GISS_PUCCINI_modelE_alt_SR1*')) inca_time, inca_o3, norm_inca_o3 = read_model(glob.glob('model_files/INCA*')) llnl_time, llnl_o3, norm_llnl_o3 = read_model(glob.glob('model_files/LLNL*')) mozart_time, mozart_o3, norm_mozart_o3 = read_model(glob.glob('model_files/MOZARTGFDL*')) mozech_time, mozech_o3, norm_mozech_o3 = read_model(glob.glob('model_files/MOZECH*')) oslo_time, oslo_o3, norm_oslo_o3 = read_model(glob.glob('model_files/OsloCTM2*')) #tm5_time, tm5_o3, norm_tm5_o3 = read_model(glob.glob('model_files/TM5-JRC*')) #Read in obs #now read in the observations myfile=nappy.openNAFile('York_merge_Cape_verde_1hr_R1.na') myfile.readData() #ppy.openNAFile('York_merge_Cape_verde_1hr_R1.na') k_var1=myfile["VNAME"].index('Ozone mixing ratio (ppbV)_(Mean)') # OK need to conver values from a list to a numpy array time=np.array(myfile['X']) var1=np.array(myfile['V'][k_var1]) valids1=var1 > 0 time=time[valids1] var=var1[valids1] valids2= time <= 730 obs_time=time[valids2]
def read_station_data(infile, **kargs): ''' Conversion of NASA aimes files from nilo.no data sets. ''' import nappy as nap # Read and write NASA data files # Open file (read header only) nas_file = nap.openNAFile(infile) # Read actual data nas_file.readData() # Access the ozone data data_raw = np.array(nas_file.getNADict()['V']) # Close the nas-file nas_file.close() # Keywords conversion = kargs.pop('conversion', True) tracer = kargs.pop('tracer', ('O3', )) verbose = kargs.pop('v', False) if (data_raw.shape[0] - 1) / 2 < len(tracer): for each in tracer: if nas_file.getNADict()['NCOM'][-1].find(each) > 0: return (read_station_data(infile, tracer=(each, ))) # Conversion = 1/air_dens(25degC)/mass_fraction # 1/2 for O3 (0.5) # ca. 0.38 for SO2 # ca. 0.81 for NO M_air = 28.949 # [g/mol] air_dens_25 = 1.1839 # [kg/m3] mass_fraction = { 'O3': 3 * 15.9994 / M_air, 'SO2': 32.065 / M_air, # ug(S)/m3 'SO4': 32.065 / M_air, 'NO': 14.0067 / M_air, # ug(N)/m3 'NO2': 14.0067 / M_air } nas_date = nas_file.getNADict()['DATE'] start_date = dt.datetime.strptime( "%s-0%s-0%s 00:00:00" % (nas_date[0], nas_date[1], nas_date[2]), '%Y-%m-%d %H:%M:%S') # Filter the data (FLAG: 0 - valid, >0 - invalid) # Add day fraction at stop time to start date x_time_station = np.ma.masked_where( data_raw[2] > 0, datetime_from_time(start_date, data_raw[0])[0]) data_dic = {'time': x_time_station} if verbose: for each in tracer: print(each, data_raw.shape, (nas_file.getNADict()['NCOM'][-1]).split().index(each), (nas_file.getNADict()['NCOM'][-1]).split()[( nas_file.getNADict()['NCOM'][-1]).split().index(each)]) #i = 1 for each in tracer: # Find column in which tracer appears # Split table header and look for it # Table header has start/end time while # nas_file.getNADict()['V'] only gives one time field i = (nas_file.getNADict()['NCOM'][-1]).split().index(each) - 1 if conversion: data_dic[each] = np.ma.masked_where( data_raw[i + 1] > 0, data_raw[i] * 1 / air_dens_25 / mass_fraction[each]) else: data_dic[each] = np.ma.masked_where(data_raw[i + 1] > 0, data_raw[i]) #i += 1 return (data_dic)
def writeNAFiles(self, na_file=None, delimiter=default_delimiter, annotation=False, float_format=default_float_format, size_limit=None, no_header=False): """ Writes the self.na_dict_list content to one or more NASA Ames files. Output file names are based on the self.nc_file name unless specified in the na_file_name argument in which case that provides the main name that is appended to if multiple output file names are required. TODO: no_header is NOT implemented. """ self.convert() # just in case not already called # Gets a list of NA file_names that will be produced. file_names = self.constructNAFileNames(na_file) # Set up some counters: file_counter is the expected number of files. # full_file_counter includes any files that have been split across multiple output NA files # because size_limit exceeded. file_counter = 1 full_file_counter = 1 file_list = [] # Get any NASA Ames dictionary values that should be overwritten with local values local_attributes = nappy.utils.getLocalAttributesConfigDict() local_na_atts = local_attributes["na_attributes"] # define final override list by using defaults then locally provided changes overriders = local_na_atts for (okey, ovalue) in self.na_items_to_override.items(): overriders[okey] = ovalue # Now loop through writing the outputs for na_dict_and_var_ids in self.na_dict_list: file_name = file_names[file_counter - 1] msg = "\nWriting output NASA Ames file: %s" % file_name if DEBUG: log.debug(msg) self.output_message.append(msg) # Set up current na dict (this_na_dict, vars_to_write) = na_dict_and_var_ids # Override content of NASA Ames if they are permitted for key in overriders.keys(): if key in permitted_overwrite_metadata: if key in items_as_lists: new_item = overriders[key].split() if key in ("DATE", "RDATE"): new_item = [int(list_item) for list_item in new_item] else: new_item = overriders[key] # Do specific overwrite for comments by inserting lines at start if key in ("SCOM", "NCOM"): # Use rule defined in config file in terms of where to put new comments if comment_override_rule == "replace": comments_list = new_item[:] elif comment_override_rule in ("insert", "extend"): new_comments = new_item[:] existing_comments = this_na_dict.get(key, []) comments_list = self._cleanWrapComments(existing_comments, new_comments, key, comment_override_rule) else: raise Exception("Did not recognise comment_override_rule: " + str(comment_override_rule)) this_na_dict[key] = comments_list this_na_dict["N%sL" % key] = len(comments_list) elif not this_na_dict.has_key(key) or new_item != this_na_dict[key]: this_na_dict[key] = new_item msg = "Metadata overwritten in output file: '%s' is now '%s'" % (key, this_na_dict[key]) if DEBUG: log.debug(msg) self.output_message.append(msg) # For certain FFIs create final Normal comments as a list of column headers before data section if add_column_headers == True: self._updateWithColumnHeaders(this_na_dict, delimiter) # Cope with size limits if specified and FFI is 1001 # Seems to be writing different chunks of a too long array to different na_dicts to then write to separate files. if size_limit is not None and (this_na_dict["FFI"] == 1001 and len(this_na_dict["V"][0]) > size_limit): files_written = self._writeNAFileSubsetsWithinSizeLimit(this_na_dict, file_name, delimiter=delimiter, float_format=float_format, size_limit=size_limit, annotation=annotation) file_list.extend(files_written) # If not having to split file into multiple outputs (normal condition) else: log.info("Output NA file name: %s" % file_name) x = nappy.openNAFile(file_name, 'w', this_na_dict) x.write(delimiter=delimiter, float_format=float_format, annotation=annotation) x.close() file_list.append(file_name) # Report on what has been written msg = "\nWrote the following variables:" + "\n " + ("\n ".join(vars_to_write[0])) if DEBUG: log.debug(msg) self.output_message.append(msg) msg = "" aux_var_count = vars_to_write[1] if len(aux_var_count) > 0: msg = "\nWrote the following auxiliary variables:" + "\n " + ("\n ".join(aux_var_count)) singleton_var_count = vars_to_write[2] if len(singleton_var_count) > 0: msg = "\nWrote the following Singleton variables:" + "\n " + ("\n ".join(singleton_var_count)) if len(file_list) > 0: msg = msg + ("\n\nNASA Ames file(s) written successfully: \n%s" % "\n".join(file_list)) full_file_counter += len(file_list) file_counter += 1 if DEBUG: log.debug(msg) self.output_message.append(msg) full_file_count = full_file_counter - 1 if full_file_count == 1: plural = "" else: plural = "s" msg = "\n%s file%s written." % (full_file_count, plural) if DEBUG: log.debug(msg) self.output_message.append(msg) self.output_files_written = file_list return self.output_message
# Standalone plot b_stand = True # Clean up if b_stand: plt.close('all') # Access environment variable for directory nas_data = os.environ['DATA'] # Data directory nas_subd = '/Ebas_Ozone' nas_src = '/Zeppelin_Mountain/NO0042G.20000101000000.20130101000000.uv_abs.ozone.air.1y.1h.NO01L_uv_abs_uk_0042.NO01L_uv_abs..nas' nas_src_3 = '/Neumeyer/DE0060G.20000101000000.20170201090710.uv_abs.ozone.air.1y.1h.DE06L_O3Neumayer2.DE06L_uv_ab.lev2.nas' # Open file (read header only) nas_file = nap.openNAFile(nas_data + nas_subd + nas_src_3) # Read actual data nas_file.readData() # Access the ozone data ozone_data_raw = np.array(nas_file.getNADict()['V']) # Filter the data (0 - valid, >0 - invalid) ozone_data = ozone_data_raw[:, np.where(ozone_data_raw[2] == 0)[0]] # Close the nas-file nas_file.close() station_name = nas_file.getNADict()['NCOM'][13][30:-14] air_dens = np.array( (1.4224, 1.1839)) # estimate for standard air density (-25 degC, 35 degC) M_O = 15.9994 # [g/mol] M_air = 28.949 # [g/mol] mass_fraction = (3 * M_O / M_air)
def plot(): try: names except NameError: # Readin the model output model , names = readfile("GEOS_logs.npy","001") #001 represents CVO # Processes the date year=(model[:,0]//10000) month=((model[:,0]-year*10000)//100) day=(model[:,0]-year*10000-month*100) hour=model[:,1]//100 min=(model[:,1]-hour*100) doy=[ datetime.datetime(np.int(year[i]),np.int(month[i]),np.int(day[i]),\ np.int(hour[i]),np.int(min[i]),0)- \ datetime.datetime(2006,1,1,0,0,0) \ for i in range(len(year))] since2006=[doy[i].days+doy[i].seconds/(24.*60.*60.) for i in range(len(doy))] #now read in the observations myfile=nappy.openNAFile('York_merge_Cape_verde_1hr_R1.na') myfile.readData() #ppy.openNAFile('York_merge_Cape_verde_1hr_R1.na') counter = 0 fig =plt.figure(figsize=(20,12)) fig.patch.set_facecolor('white') ax = plt.subplot(111) for species in species_list: #Gives species exact model tags for convenience print species if species == 'ISOPRENE': species = 'TRA_6' elif species == 'ACETONE': species = 'ACET' elif species == 'TEMP': species = 'GMAO_TEMP' elif species == 'SURFACE_PRES': species = 'GMAO_PSFC' elif species == 'WINDSPEED': species = 'GMAO_WIND' elif species == 'SURFACE_SOLAR_RADIATION': species = 'GMAO_RADSW' elif species == 'ABS_HUMIDITY': species = 'GMAO_ABSH' elif species == 'REL_HUMIDITY': species = 'GMAO_RHUM' model_cut_switch = 0 obs_switch = 0 ofac = 1 if species == 'O3': print 'yes' Units = 'ppbV' first_label_pos = 3 obs_data_name = 'Ozone mixing ratio (ppbV)_(Mean)' unit_cut= 1e9 species_type = 'Conc.' actual_species_name = 'O3' elif species == 'CO': units = 'ppbV' first_label_pos = 1 obs_data_name = 'CO mixing ratio (ppbV)_(Mean)' unit_cut= 1e9 species_type = 'Conc.' actual_species_name = 'CO' ofac = 2.0001 elif species == 'NO': units = 'pptV' first_label_pos = 1 obs_data_name = 'NO mixing ratio (pptv)_(Mean)' unit_cut= 1e12 species_type = 'Conc.' actual_species_name = 'NO' elif species == 'NO2': units = 'pptV' first_label_pos = 1 obs_data_name = 'NO2 mixing ratio (pptv)_(Mean)' unit_cut= 1e12 species_type = 'Conc.' actual_species_name = 'NO2' elif species == 'C2H6': units = 'pptV' first_label_pos = 1 obs_data_name = 'ethane mixing ratio (pptV)_(Mean)' unit_cut= 1e12 species_type = 'Conc.' actual_species_name = 'C2H6' elif species == 'C3H8': units = 'pptV' first_label_pos = 1 obs_data_name = 'propane mixing ratio (pptV)_(Mean)' unit_cut= 1e12 species_type = 'Conc.' actual_species_name = 'C3H8' elif species == 'DMS': units = 'pptV' first_label_pos = 1 obs_data_name = 'dms mixing ratio (pptV)_(Mean)' unit_cut= 1e12 species_type = 'Conc.' actual_species_name = 'DMS' elif species == 'TRA_6': #Isoprene units = 'pptV' first_label_pos = 1 obs_data_name = 'Isoprene (pptv)_(Mean)' unit_cut= 1e12 species_type = 'Conc.' elif species == 'ACET': units = 'pptV' first_label_pos = 1 obs_data_name = 'acetone mixing ratio (pptV)_(Mean)' unit_cut= 1e12 species_type = 'Conc.' actual_species_name = 'Acetone' elif species == 'GMAO_TEMP': # Temp from met fields units = 'K' first_label_pos = 3 obs_data_name = 'Air Temperature (degC) Campbell_(Mean)' unit_cut= 1 species_type = 'Temp.' actual_species_name = 'Surface Temperature' obs_switch = 1 elif species == 'GMAO_PSFC': #Surface Pressure units = 'hPa' first_label_pos = 3 obs_data_name = 'Atmospheric Pressure (hPa) Campbell_(Mean)' unit_cut= 1 species_type = 'Pres.' actual_species_name = 'Surface Pressure' elif species == 'GMAO_WIND': #Wind Speed extirpolated from UWND and VWND def read_diff_species(): k=names.index('GMAO_UWND') i=names.index('GMAO_VWND') model_cut=np.sqrt((model[:,k]**2)+(model[:,i]**2)) return model_cut units = r'$ms^{-1}$' first_label_pos = 3 obs_data_name = 'Wind Speed (m/s) Campbell_(Mean)' unit_cut= 1 species_type = 'Wind Speed' model_cut_switch = 1 actual_species_name = 'Surface Windspeed' elif species == 'GMAO_RADSW': #Sensible heat flux form surface units = r'$Wm^{-2}$' first_label_pos = 3 obs_data_name = 'Solar Radiation (Wm-2) Campbell_(Mean)' unit_cut= 1 species_type = 'Solar Radiation' actual_species_name = 'Surface Solar Radiation' elif species == 'GMAO_ABSH': #Absolute Humidity units = 'molec/cm-3' first_label_pos = 3 obs_data_name = '' unit_cut= 1 species_type = 'Absolute Humidity' actual_species_name = 'Absolute Humidity' elif species == 'GMAO_RHUM': #Relative Humidity units = '%' first_label_pos = 3 obs_data_name = 'Relative Humidity (%) Campbell_(Mean)' unit_cut= 1 species_type = 'Relative Humidity' actual_species_name = 'Relative Humidity' k_var1=myfile["VNAME"].index(obs_data_name) # OK need to conver values from a list to a numpy array time=np.array(myfile['X']) if obs_switch == 0: var1=np.array(myfile['V'][k_var1]) elif obs_switch == 1: var1=np.array(myfile['V'][k_var1])+273.15 valids1=var1 > 0 time2=time[valids1] var2=var1[valids1] #Pre normalise obs data for lomb analysis standard_deviation_obs_p = np.std(var2) mean_obs_p = np.mean(var2) normal_var2 = var2-mean_obs_p normal_var2 = normal_var2/standard_deviation_obs_p #Calculate variance of pre-processed obs data- should be 1 if normal #standard_dev_obs = np.std(normal_var_2, dtype=np.float64) #variance_obs = standard_dev_obs**2 #print 'Variance - pre-processed obs data= ', variance_obs #Define sampling intervals samp_spacing = 1./24. #Convert model time array into numpy array since2006=np.array(since2006) #Need to normalise model data also if model_cut_switch == 0: k=names.index(species) print model[:,k] model_cut = model[:,k]*unit_cut if model_cut_switch == 1: model_cut = read_diff_species() #Add seasonal emission trend onto ethane. first_season = np.linspace(0,100,num=91,endpoint=True) second_season = first_season[::-1] third_season = np.linspace(0,-100, num=91, endpoint=True) fourth_season = third_season[::-1] fourth_season =np.append(fourth_season,0) n=0 n_end=24 step = 24 season_index = 0 new_model_cut=[] year_count = 0 count=0 while 1==1: if count <91: sliced = model_cut[n:n_end] season_value = first_season[season_index] sliced = [a+season_value for a in sliced] new_model_cut.append(sliced) n+=step n_end+=step #print 'season_1', count, season_index season_index+=1 if season_index == 91: season_index= 0 elif count <182: sliced = model_cut[n:n_end] season_value = second_season[season_index] sliced = [a+season_value for a in sliced] new_model_cut.append(sliced) n+=step n_end+=step #print 'season_2', count, season_index season_index+=1 if season_index == 91: season_index= 0 elif count < 273: sliced = model_cut[n:n_end] season_value = third_season[season_index] sliced = [a+season_value for a in sliced] new_model_cut.append(sliced) n+=step n_end+=step #print 'season_3', count, season_index season_index+=1 if season_index == 91: season_index= 0 elif count < 365: sliced = model_cut[n:n_end] season_value = fourth_season[season_index] sliced = [a+season_value for a in sliced] new_model_cut.append(sliced) n+=step n_end+=step #print 'season_4', count, season_index season_index+=1 else: count = 0 year_count+=1 season_index = 0 continue if year_count == 6: break count+=1 new_model_cut = reduce(lambda x,y: x+y,new_model_cut) standard_deviation_model_p = np.std(model_cut) mean_model_p = np.mean(model_cut) normal_model = model_cut-mean_model_p normal_model = normal_model/standard_deviation_model_p standard_deviation_model_p_corrected = np.std(new_model_cut) mean_model_p_corrected = np.mean(new_model_cut) normal_model_corrected = new_model_cut-mean_model_p_corrected normal_model_corrected = normal_model_corrected/standard_deviation_model_p_corrected #Calculate variance of pre-processed model data- should be 1 if normal #standard_dev_model = np.std(normal_model, dtype=np.float64) #variance_model = standard_dev_model**2 #print 'Variance - pre-processed model data= ', variance_model #Define sampling frequency samp_freq = 24 #Lomb-scargle plot #Plot axis period lines and labels #annotate_line_y=np.arange(1e-10,1e4,1) #horiz_line_100 =np.arange(0,2000,1) #freq_year = [345]*len(annotate_line_y) #array_100 = [100]*len(horiz_line_100) #plt.plot(freq_year, annotate_line_y,'r--',alpha=0.4) #plt.text(345, 5, '1 Year', fontweight='bold') #plt.plot(horiz_line_100, array_100,'r--',alpha=0.4) #plt.text(1024, 80, '100%', fontweight='bold') #Obs lomb fa, fb, nout, jmax, prob = lomb.fasper(time2, normal_var2, ofac, samp_freq) #Divide output by sampling frequency fb = fb/samp_freq fb = np.log(fb) obs_smoothed=savitzky_golay(fb, window_size=301, order=1) obs_smoothed = np.exp(obs_smoothed) #Calculate Nyquist frequency, Si and Si x 2 for normalisation checks. #nyquist_freq_lomb_obs = frequencies[-1] #Si_lomb_obs = np.mean(fb)*nyquist_freq_lomb_obs #print nyquist_freq_lomb_obs, Si_lomb_obs, Si_lomb_obs*2 #plot up #plt.loglog(1./fa, fb,'kx',markersize=2, label='Cape Verde Obs. ') #Model lomb fx, fy, nout, jmax, prob2 = lomb.fasper(since2006,normal_model, ofac, samp_freq) #Divide output by sampling frequency fy = fy/samp_freq fy = np.log(fy) model_smoothed=savitzky_golay(fy, window_size=301, order=1) model_smoothed = np.exp(model_smoothed) #Model lomb fx, fy, nout, jmax, prob2 = lomb.fasper(since2006,normal_model_corrected, ofac, samp_freq) #Divide output by sampling frequency fy_corrected = fy/samp_freq fy_corrected = np.log(fy) model_corrected_smoothed=savitzky_golay(fy_corrected, window_size=301, order=1) model_corrected_smoothed = np.exp(model_corrected_smoothed) #Calculate Nyquist frequency, Si and Si x 2 for normalisation checks. #nyquist_freq_lomb_model = frequencies[-1] #Si_lomb_model = np.mean(fy)*nyquist_freq_lomb_model #print nyquist_freq_lomb_model, Si_lomb_model, Si_lomb_model*2 #plot up #plt.loglog(1./fx, fy, 'gx', alpha = 0.75,markersize=2, label='GEOS v9.01.03 4x5 ') obs_periods = 1./fa model_periods = 1./fx #Which dataset is shorter # obs longer than model if len(obs_smoothed) > len(model_smoothed): obs_smoothed = obs_smoothed[:len(model_smoothed)] freq_array = fx period_array = model_periods #model longer than obs if len(model_smoothed) > len(obs_smoothed): model_smoothed = model_smoothed[:len(obs_smoothed)] model_corrected_smoothed = model_corrected_smoothed[:len(obs_smoothed)] freq_array = fa period_array = obs_periods #calculate % of observations #covariance_array = np.hstack((fb,fy)) compare_powers = model_smoothed/obs_smoothed compare_powers = compare_powers *100 corrected_compare_powers = model_corrected_smoothed/obs_smoothed corrected_compare_powers = corrected_compare_powers *100 ax.set_xscale('log', basex=10) ax.set_yscale('log', basey=10) #plt.plot(obs_periods,fb, color = 'k', marker='x', alpha = 0.75, markersize=2, label = 'Mace Head' #plt.plot(period_array, corrected_compare_powers , color=colour_list[counter], marker='x', alpha = 0.75, markersize=2, label = species) plt.plot(period_array, compare_powers , color='black', marker='x', alpha = 0.75, markersize=2, label = species) #ax.plot(rest_cut_periods, rest_powers , color=colour_list[counter], marker='x', alpha = 0.75, markersize=2, label = species) #percent1 = period_percent_diff(np.min(obs_periods),1,fb,fy,obs_periods,model_periods) #percent2 = period_percent_diff(1,2,fb,fy,obs_periods,model_periods) #percent3 = period_percent_diff(2,7,fb,fy,obs_periods,model_periods) plt.grid(True) ax.xaxis.set_major_formatter(FormatStrFormatter('%.i')) ax.yaxis.set_major_formatter(FormatStrFormatter('%.i')) leg=plt.legend(loc=4, prop={'size':21}) leg.get_frame().set_alpha(0.4) #plt.text(1e-2, 3000,'Period: 2 hours to 1 day, a %% Diff. of: %.2f%%' %(percent1), fontweight='bold') #plt.text(1e-2, 500,'Period: 1 day to 2 days, a %% Diff. of: %.2f%%' %(percent2), fontweight='bold') #plt.text(1e-2, 90,'Period: 2 days to 7 days, a %% Diff. of: %.2f%%' %(percent3), fontweight='bold') plt.xlim(0.05,1e1) plt.ylim(0.001,1e3) plt.xlabel('Period (Days)', fontsize=21) plt.ylabel('Percent of Obs. PSD (%)', fontsize=21) plt.title('% PSD of Model compared to Obs.',fontsize=21) counter+=1 #plt.savefig('O3_capeverde_comparison_plots.ps', dpi = 200) plt.show()
def _read_single_file(ifile, firstday=None, lastday=None, time_offset=0): '''Read a single GAW WDCRG file.''' import nappy log = logging.getLogger(__name__) log.info('Reading {}'.format(ifile)) ds = nappy.openNAFile(ifile) ds.readData() keys = ds.getNADict().keys() # check for all required entries if 'DATE' not in keys: log.warning('Cannot get reference time - skip entry: {}'.format(ifile)) return None if 'VNAME' not in keys: log.warning('Cannot get variable names - skip entry: {}'.format(ifile)) return None if 'X' not in keys: log.warning('Cannot get dates - skip entry: {}'.format(ifile)) return None if 'V' not in keys: log.warning('Cannot get values - skip entry: {}'.format(ifile)) return None if 'NCOM' not in keys: log.warning('Cannot get comments - skip entry: {}'.format(ifile)) return None idf = pd.DataFrame() # reference date refdate_list = ds['DATE'] refdate = dt.datetime(refdate_list[0], refdate_list[1], refdate_list[2]) # parse start dates, round to nearest hour offset = dt.timedelta(minutes=time_offset) start = [ round_to_nearest_hour(refdate + dt.timedelta(days=i)) for i in ds['X'] ] vnames = ds['VNAME'] # parse end dates, round to nearest hour if 'end_time of measurement' in vnames[0]: end = [ round_to_nearest_hour(refdate + dt.timedelta(days=i)) for i in ds['V'][0] ] else: end = start # Observation is middle of time stamp idf['ISO8601'] = [ refdate + ((i - refdate) + (j - refdate)) / 2 + offset for i, j in zip(start, end) ] nobs = idf.shape[0] # get station information station_name = 'unknown' station_lat = np.nan station_lon = np.nan for c in ds['NCOM']: if 'Station name' in c: station_name = c.split(':')[1].replace(' ', '') if 'Station latitude' in c: station_lat = np.float(c.split(':')[1].replace(' ', '')) if 'Station longitude' in c: station_lon = np.float(c.split(':')[1].replace(' ', '')) if station_name == 'unknown': log.warning('Unknown station name for file {}'.format(ifile)) if np.isnan(station_lat): log.warning('Unknown station latitude for file {}'.format(ifile)) if np.isnan(station_lon): log.warning('Unknown station longitude for file {}'.format(ifile)) idf['lat'] = [station_lat for i in range(nobs)] idf['lon'] = [station_lon for i in range(nobs)] idf['original_station_name'] = [station_name for i in range(nobs)] # get observation type, unit, and values. This is currently hard-coded, # could probably be done better. ocol = -1 for i, v in enumerate(vnames): # Skip standard deviation if 'stddev' in v: continue if 'numflag' in v: continue # Species check vals = v.split(',') ofnd = False if 'ozone' in vals[0]: obstype = 'o3' ocol = i ofnd = True if 'nitrogen_dioxide' in vals[0]: obstype = 'no2' ocol = i ofnd = True # Unit check if ofnd: u = vals[1] if 'nmol/mol' in u: obsunit = 'ppbv' scal = 1.0 if 'mmol/mol' in u: obsunit = 'ppmv' scal = 1.0 if 'ug/m3' in u: obsunit = 'ugm-3' scal = 1.0 if 'ug N/m3' in u: obsunit = 'ugm-3' if obstype == 'no2': scal = 46. / 14. if obstype == 'no': scal = 30. / 14. if ocol < 0: log.warning( 'Cannot find proper obstype - skip entry: {}'.format(ifile)) return None log.debug('species, unit, scalefactor: {}, {}, {}'.format( obstype, obsunit, scal)) log.debug('Will read concentration data from column: "{}"'.format( vnames[ocol])) obs = np.array(ds['V'][ocol]) * scal # Check for flags if 'numflag' in vnames[-1]: flag = np.array(ds['V'][-1]) obs[np.where(flag != 0.0)] = np.nan idf['obstype'] = [obstype for i in range(nobs)] idf['unit'] = [obsunit for i in range(nobs)] idf['value'] = obs # Eventually reduce to specified time range if firstday is not None: log.info('Only use data after {}'.format(firstday)) idf = idf.loc[idf['ISO8601'] >= firstday] if lastday is not None: log.info('Only use data before {}'.format(lastday)) idf = idf.loc[idf['ISO8601'] < lastday] return idf