def load_DART_obs_epoch_file_as_dataframe(E,date=datetime.datetime(2009,1,1,0,0,0),obs_type_list=['ERP_PM1','ERP_LOD'],ens_status_list=['ensemble member'], hostname='taurus',debug=False): """ read in a DART obs epoch file, defined by its date and the Experiment E, and return as a Pandas data frame, in which al the observations that have ensemble status and obs types given in ens_status_list and obs_type_list, respectively, are ordered according to ObsIndex. this should eventually replace the SR load_DART_obs_epoch_file """ # find the directory for this run # this requires running a subroutine called `find_paths`, stored in a module `experiment_datails`, # but written my each user -- it should take an experiment dictionary and the hostname # as input, and return as output # the filepath that corresponds to the desired field, diagnostic, etc. filename = es.find_paths(E,date,hostname=hostname) if not os.path.exists(filename): if debug: print("+++cannot find files that look like "+filename+' -- returning None') return None # load the file and select the observation we want else: f = Dataset(filename,'r') CopyMetaData = f.variables['CopyMetaData'][:] ObsTypesMetaData = f.variables['ObsTypesMetaData'][:] observations = f.variables['observations'][:] time = f.variables['time'][:] copy = f.variables['copy'][:] obs_type = f.variables['obs_type'][:] location = f.variables['location'][:] ObsIndex = f.variables['ObsIndex'][:] qc = f.variables['qc'][:] # find the obs_type number corresponding to the desired observations obs_type_no_list = [] for obs_type_string in obs_type_list: obs_type_no_list.append(get_obs_type_number(f,obs_type_string)) # expand "CopyMetaData" into lists that hold ensemble status and diagnostic diagn = [] ens_status = [] CMD = [] # loop over the copy meta data and record the ensemble status and diagnostic for reach copy for icopy in copy: temp = CopyMetaData[icopy-1,].tostring() CMD.append(temp.rstrip()) if 'prior' in temp: diagn.append('Prior') if 'posterior' in temp: diagn.append('Posterior') if 'truth' in temp: diagn.append('Truth') ens_status.append('Truth') if 'observations' in temp: diagn.append('Observation') ens_status.append('Observation') if 'ensemble member' in temp: ens_status.append('ensemble member') if 'ensemble mean' in temp: ens_status.append('ensemble mean') if 'ensemble spread' in temp: ens_status.append('ensemble spread') if 'observation error variance' in temp: ens_status.append(None) diagn.append(None) f.close() # return the desired observations and copys, and the copy meta data #for obs_type_no in obs_type_no_list: iobs=[] iensstatus=[] if debug: print('selecting the following obs type numbers') print(obs_type_no_list) for OTN in obs_type_no_list: itemp = np.where(obs_type == OTN) if itemp is not None: # itemp is a tuple - the first entry is the list of indices (I know - this is f****d) itemp2 = itemp[0] # now scoot through itemp2 (which is an ndarray...wtf?) and store the entires in a list for i in itemp2: iobs.append(i) # select the copys correposnind go the right ensemble status (or just copystring if the list isn;t give) and diagnostic if ens_status_list is None: ens_status_list = [] ens_status_list.append(E['copystring']) if debug: print(ens_status_list) for ES in ens_status_list: indices = [i for i, x in enumerate(ens_status) if x == ES] iensstatus.extend(indices) iensstatus.sort() # this is the list of copies with the right ensemble status idiagn = [i for i,x in enumerate(diagn) if x == E['diagn']] # this is the list of copies with the right diagnostic # we are interested in the indices that appear in both iensstatus and idiagn sdiagn = set(idiagn) cc = [val for val in iensstatus if val in sdiagn] if debug: print('these are the copies that suit both the requested ensemble status and the requested diagnostic:') print(cc) # given the above copy numbers, find the names that suit them copynames = [CMD[ii] for ii in cc] # turn the array obs_type from numbers to words OT = [] for ii in obs_type: temp = ObsTypesMetaData[ii-1,].tostring() OT.append(temp) # these are the obs types for the observations we select out OT_select = [OT[ii] for ii in iobs] # now select the observations corresponding to the selected copies and obs types i1 = np.array(iobs) i2 = np.array(cc) obs_select = observations[i1[:,None],i2] location_select = location[i1,] obs_type_select = obs_type[i1,] qc1_select = qc[i1,0] qc2_select = qc[i1,1] time_select = time[i1] ObsIndex_select = ObsIndex[i1] # for the arrays that are only defined by obs index, replicate for each copy loc1 = location_select[:,0] loc2 = location_select[:,1] loc3 = location_select[:,2] loc1_copies= np.repeat(loc1[:,np.newaxis],len(i2),1) loc2_copies= np.repeat(loc2[:,np.newaxis],len(i2),1) loc3_copies= np.repeat(loc3[:,np.newaxis],len(i2),1) qc1_copies = np.repeat(qc1_select[:,np.newaxis],len(i2),1) qc2_copies = np.repeat(qc2_select[:,np.newaxis],len(i2),1) obs_type_copies= np.repeat(obs_type_select[:,np.newaxis],len(i2),1) ObsIndex_copies = np.repeat(ObsIndex_select[:,np.newaxis],len(i2),1) # reshape the output from arrays to vectors # also have to squeeze out the empty dimension -- this seems really inelegant, but I don't know a better way to do it! L = len(iobs)*len(cc) # length of the data vector date_out = np.repeat(date,L) obs_out = np.squeeze(np.reshape(obs_select,(L,1))) lon_out = np.squeeze(np.reshape(loc1_copies,(L,1))) lat_out = np.squeeze(np.reshape(loc2_copies,(L,1))) lev_out = np.squeeze(np.reshape(loc3_copies,(L,1))) qc1_out = np.squeeze(np.reshape(qc1_copies,(L,1))) qc2_out = np.squeeze(np.reshape(qc2_copies,(L,1))) obs_type_out = np.squeeze(np.reshape(obs_type_copies,(L,1))) ObsIndex_out = np.squeeze(np.reshape(ObsIndex_copies,(L,1))) # for each of the selected obs, report its copystring, ensemble status, and obs type copynames_out = [] for ii in range(len(iobs)): for cn in copynames: copynames_out.append(cn) # round the location values because otherwise pandas f***s up the categorial variable aspect of them lat_out = np.round(lat_out,1) lon_out = np.round(lon_out,1) lev_out = np.round(lev_out) # return data frame data = {'QualityControl':qc1_out, 'DARTQualityControl':qc2_out, 'Value':obs_out, 'Latitude':lat_out, 'Longitude':lon_out, 'Level':lev_out, 'Date':date_out, 'CopyName':copynames_out } DF = pd.DataFrame(data,index=ObsIndex_out) # turn categorical data into categories #DF['QualityControl'] = DF['QualityControl'].astype('category') #DF['Latitude'] = DF['Latitude'].astype('category') #DF['Longitude'] = DF['Longitude'].astype('category') #DF['Level'] = DF['Level'].astype('category') #DF['CopyName'] = DF['CopyName'].astype('category') #return ObsIndex_out, loc1_out, loc2_out, loc3_out, qc_out, obs_out, copynames return DF
def load_DART_obs_epoch_file(E,date_in=None, hostname='taurus',debug=False): """ this function reads in an obs_epoch_XXX.nc file for a certain DART experiment, with the obs that we want given in obs_type_list, and returns a vector of the desired observation. INPUTS: E: an experiment dictionary if E['copystring'] is a list of copystrings, we cycle through them. if one of the strings in E['copystring'] is 'ensemble member', then return all the ensemble members. if E['obs_name'] is a list of observation types, we cycle through and load them all. date: the date on which we want to load the obs the default for this is None -- in this case, just choose the first entry of E['daterange'] hostname: computer name - default is Taurus debug: debugging flag; default is False. """ # select the date if date_in is None: date_in = E['daterange'][0] # find the directory for this run # this requires running a subroutine called `find_paths`, stored in a module `experiment_datails`, # but written my each user -- it should take an experiment dictionary and the hostname # as input, and return as output # the filepath that corresponds to the desired field, diagnostic, etc. filename = es.find_paths(E,date_in,hostname=hostname,file_type='obs_epoch',debug=debug) if not os.path.exists(filename): if debug: print("+++cannot find files that look like "+filename+' -- returning None') return None,None # load the file and select the observation we want else: f = Dataset(filename,'r') if debug: print('Loading file '+filename) observations = f.variables['observations'][:] time = f.variables['time'][:] copy = f.variables['copy'][:] location = f.variables['location'][:] CopyMetaData = f.variables['CopyMetaData'][:] ObsTypesMetaData = f.variables['ObsTypesMetaData'][:] obs_type = f.variables['obs_type'][:] QCMetaData_array = f.variables['QCMetaData'][:] qc = f.variables['qc'][:] qc_copy = f.variables['qc_copy'][:] # find the obs_type number corresponding to the desired observations if type(E['obs_name']) is list: obs_type_no_list = [] for obs_type_string in E['obs_name']: # note that obs_type_string could have weird spaces around it -- strip those off here obs_type_no_list.append(get_obs_type_number(f,obs_type_string.rstrip())) else: obs_type_no = get_obs_type_number(f,E['obs_name'].rstrip()) obs_type_no_list = [obs_type_no] if type(E['copystring']) is not list: # if E['copystring'] is not a list and not 'ensemble', # we only have one copy number to get -- cc tells us the number of it # note also the prior and posterio diagnostics are not available for everything, i.e observations themselves if 'observation' in E['copystring']: cc = get_copy(f,E['copystring']) else: diagn = E['diagn'] cc = get_copy(f,diagn.lower()+' '+E['copystring']) else: # if we have to retrieve more than one copy, # expand "CopyMetaData" into lists that hold ensemble status and diagnostic diagn = [] ens_status = [] CMD = [] for icopy in copy: temp = CopyMetaData[icopy-1,].tostring() CMD.append(temp.rstrip()) if 'prior' in temp: diagn.append('Prior') if 'posterior' in temp: diagn.append('Posterior') if 'truth' in temp: diagn.append('Truth') ens_status.append('Truth') if 'observation' in temp: diagn.append('') ens_status.append('Observation') if 'ensemble member' in temp: ens_status.append('ensemble member') if 'ensemble mean' in temp: ens_status.append('ensemble mean') if 'ensemble spread' in temp: ens_status.append('ensemble spread') if 'observation error variance' in temp: ens_status.append(None) diagn.append(None) f.close() #------locations, quality control, and observation codes for requested obs types # empty lists to hold various traits of the observations that fit the requested obs types iobs=[] iensstatus=[] obs_codes = [] lons = [] lats = [] levs = [] # create a dictionary to hold all available Quality Control flags QCMetaData = [QCMD.tostring() for QCMD in QCMetaData_array] QCdict = {k:[] for k in QCMetaData} # loop over all obs and store the relevant obs for the ones where the type code matches the request if debug: print('this is the list of obs type numbers') print(obs_type_no_list) for OTN in obs_type_no_list: itemp = np.where(obs_type == OTN) # observation numbers of all obs that fit this obs type if itemp is not None: if debug: temp = ObsTypesMetaData[OTN-1,:].tostring() print('these obs indices match obs of type '+temp.decode('UTF-8')) print(np.squeeze(itemp)) iobs.append(list(np.squeeze(itemp))) obs_codes.append(np.squeeze(obs_type[itemp])) lons.append(np.squeeze(location[itemp,0])) lats.append(np.squeeze(location[itemp,1])) levs.append(np.squeeze(location[itemp,2])) # loop over all available QC flags and store in a list, then a dict # note that DART QC copies start at 1, but python indices start at 0 for iqc,qcname in zip(qc_copy,QCMetaData): QCdict[qcname].append(np.squeeze(qc[itemp,iqc-1])) # we now have several lists (as many as the number of obs types we requested) # of lists --> turn them into a single list of indices iobs2 = [ii for sublist in iobs for ii in sublist] obs_codes_list = [ii for sublist in obs_codes for ii in sublist] lons_list = [ii for sublist in lons for ii in sublist] lats_list = [ii for sublist in lats for ii in sublist] levs_list = [ii for sublist in levs for ii in sublist] for qcname in QCMetaData: old_list = QCdict[qcname] new_list = [ii for sublist in old_list for ii in sublist] QCdict[qcname] = new_list if debug: print('retrieving '+str(len(iobs2))+' observations') # instead of obs number codes, return strings that identify the obs obs_names_out = [ObsTypesMetaData[obs_code-1].tostring() for obs_code in obs_codes_list] #------observation values for requested copies of the requested observations if type(E['copystring']) is not list: # in this case only a single copy, which is defined in E, is returned obs_out = observations[iobs2,cc] copy_names = E['diagn'].lower()+' '+E['copystring'] else: # in this case several copies are returned for CS in E['copystring']: # ensemble member names are stored weirdly in DART output -- convert here if 'ensemble member ' in CS: import re ensindex = re.sub(r'ensemble member*','',CS).strip() if int(ensindex) < 10: spacing = ' ' else: spacing = ' ' CS = "ensemble member"+spacing+str(ensindex) if debug: print('looking for copy '+CS) if CS is 'ensemble': # in this case look for all the copies that have ensemble status = "ensemble member" indices = [i for i, x in enumerate(ens_status) if x == 'ensemble member'] else: # for all other copystrings, just look for the CopyMetaData entries that contrain that copystring indices = [i for i, x in enumerate(CMD) if CS in x] if debug: print('here are the copy indices that fit this copystring') print(indices) iensstatus.extend(indices) iensstatus.sort() # this is the list of copies with the right ensemble status idiagn = [i for i,x in enumerate(diagn) if x == E['diagn']] # this is the list of copies with the right diagnostic if debug: print('here are the copy indices that fit the requested diagnostic') print(idiagn) # we are interested in the indices that appear in both iensstatus and idiagn sdiagn = set(idiagn) jj = [val for val in iensstatus if val in sdiagn] if debug: print('here are the copy indices that fit both the requested copystrings and the requested diagnostic') print(jj) print('this corresponds to the following:') for j in jj: print(CMD[j]) # now select the observations corresponding to these copies obs1 = observations[iobs2,:] obs2 = obs1[:,jj] obs_out = obs2 copy_names = [ CMD[i] for i in jj ] return obs_out,copy_names,obs_names_out,lons_list,lats_list,levs_list,QCdict
def load_DART_diagnostic_file(E,date=datetime.datetime(2009,1,1,1,0,0),hostname='taurus',debug=False): # if debugging, print out what we're doing if debug: print('+++++++++++++++++++++++++++++++++++++++++') print("Retrieving experiment "+E['exp_name']) print("for diagnostic "+E['diagn']) print("variable "+E['variable']) print("copy "+E['copystring']) if isinstance(date,str): datestr=date else: datestr = date.strftime("%Y-%m-%d") print("and date "+datestr) print('+++++++++++++++++++++++++++++++++++++++++') # retrieve the entries of the experiment dictionary, E: variable = E['variable'] experiment = E['exp_name'] # a list of 2d variables -- if the var is 2d, don't need to load vertical levels # TODO: add other 2d variables to this list variables_2d = ['PS','ptrop','ztrop'] # find the directory for this run # this requires running a subroutine called `find_paths`, stored in a module `experiment_datails`, # but written my each user -- it should take an experiment dictionary and the hostname # as input, and return as output # the filepath that corresponds to the desired field, diagnostic, etc. filename = es.find_paths(E,date,'diag',hostname=hostname,debug=debug) if not os.path.exists(filename): if debug: print("+++cannot find files that look like "+filename+' -- returning None') return None,None,None,None,None,None,None else: if debug: print('opening file '+filename) f = Dataset(filename,'r') if variable in variables_2d: # don't need info about hybrid model levels if the variable is 2d lev=None P0=None hybm=None hyam=None else: # TODO: add a check so that hybrid model level info is only loaded # for models with hybrid vertical levels lev = f.variables['lev'][:] P0 = f.variables['P0'][:] hybm = f.variables['hybm'][:] hyam = f.variables['hyam'][:] # load CopyMetaData if availabe if 'CopyMetaData' in f.variables: CMD = f.variables['CopyMetaData'][:] CMD = f.variables['CopyMetaData'][:] CopyMetaData = [] for ii in range(0,len(CMD)): temp = CMD[ii,].tostring().decode("utf-8") CopyMetaData.append(temp.rstrip()) else: # if it's not available, look it up for that experiment CopyMetaData = es.get_expt_CopyMetaData_state_space(E) # load the requested dynamical variable - these can have different names, so # first check if the requested variable, and if it's not found, try alternatives if E['variable'] in f.variables: varname_load=E['variable'] else: # here is a dictionary that holds alternative variable names to try possible_varnames_dict={'T':['t','var130'], 'TS':['t','var130'], 'U':['u','var131'], 'US':['u','var131'], 'V':['v','var132'], 'VS':['v','var132'], 'Z':['z','var129'], 'geopotential':['z','var129'], 'GPH':['Z','z','var129'], 'var129':['Z','z','var129'], 'msl':['var151'], 'mslp':['var151'], 'ztrop':['ptrop'], 'Nsq':['brunt']} possible_varnames_list=possible_varnames_dict[E['variable']] for varname in possible_varnames_list: if varname in f.variables: varname_load = varname # if the desired variable is still not found, throw an error and abort if 'varname_load' not in locals(): print('Unable to find variable '+E['variable']+' in file '+filename) # change the prefactor for certain variables prefac=1.0 # a prefactor that can be changed for loading some variables # now actually load the variable, and replace its bad # values with NaNs V = f.variables[varname_load] VV = prefac*V[:] if hasattr(V,'_FillValue'): VV[VV==V._FillValue]=np.nan if (variable=='US'): lat = f.variables['slat'][:] else: lat = f.variables['lat'][:] if (variable=='VS'): lon = f.variables['slon'][:] else: lon = f.variables['lon'][:] #------finding which copies to retrieve if type(E['copystring']) is not list: copies = None # if the diagnostic is the Truth, then the copy string can only be one thing if (E['diagn'] == 'Truth'): copies = get_copy(f,CopyMetaData,'true state') # if we want the ensemble variance or std, copystring has to be the ensemble spread if (E['extras'] == 'ensemble variance') or (E['extras'] == 'ensemble variance scaled') or (E['extras'] == 'ensemble std'): copies = get_copy(f,CopyMetaData,'ensemble spread') # if requesting the entire ensemble, loop over ensemble members here if E['copystring'] is 'ensemble': #copies = [get_copy(f,cs.tostring().decode("utf-8") ) for cs in CopyMetaData if 'ensemble member' in cs.tostring().decode("utf-8")] copies = [get_copy(f,CopyMetaData,cs) for cs in CopyMetaData if 'ensemble member' in cs] # if none of the above apply, just choose whatever is in copystring if copies is None: copies = [get_copy(f,CopyMetaData,E['copystring'],debug=debug)] else: # if E['copystring'] is a list, loop through it and find the copies to load for CS in E['copystring']: if CS is 'ensemble': # in this case look for all the copies that have ensemble status = "ensemble member" copies = [get_copy(f,CopyMetaData,cs) for cs in CopyMetaData if 'ensemble member' in cs] else: # for all other copystrings, just look for the CopyMetaData entries that contrain that copystring copies = [get_copy(f,CopyMetaData,cstring) for cstring in CopyMetaData if cstring==CS] #------finding which copies to retrieve # figure out which vertical level range we want if variable in variables_2d: lev2 = None else: levrange=E['levrange'] k1 = (np.abs(lev-levrange[1])).argmin() k2 = (np.abs(lev-levrange[0])).argmin() lev2 = lev[k1:k2+1] # figure out which latitude range we want latrange=E['latrange'] j2 = (np.abs(lat-latrange[1])).argmin() j1 = (np.abs(lat-latrange[0])).argmin() lat2 = lat[j1:j2+1] # figure out which longitude range we want lonrange=E['lonrange'] i2 = (np.abs(lon-lonrange[1])).argmin() i1 = (np.abs(lon-lonrange[0])).argmin() lon2 = lon[i1:i2+1] if variable in variables_2d: VV2 = VV[0,copies,j1:j2+1,i1:i2+1] else: VV2 = VV[0,copies,j1:j2+1,i1:i2+1,k1:k2+1] #------------extra computations # if tropopause altitude (ztrop) was requested, we retrieved tropopause pressure -- # convert it here if E['variable']=='ztrop': H = 7.0 # 7 km scale height if np.max(VV2) > 1000.0: # in this case, pressure is in Pa P0 = 1.0E5 else: P0 = 1.0E3 VVpress = VV2 VV2 = H*np.log(P0/VVpress) # if the ensemble variance was requested, square it here if (E['extras'] == 'ensemble variance'): VVout = np.square(VV2) # if the copystring is ensemble variance scaled, square the ensemble spread and scale by ensemble size if (E['extras'] == 'ensemble variance scaled'): if debug: print('squaring and scaling ensemble spread to get scaled variance') N = get_ensemble_size(f) fac = (N+1)/N VVout = fac*np.square(VV2) else: VVout = VV2 # close the primary file f.close() # if requestiing the mean square error (MSE), load the corresponding truth run and subtract it out, then square if (E['extras'] == 'MSE'): Etr = E.copy() Etr['diagn'] = 'Truth' filename_truth = es.find_paths(Etr,date,'truth',hostname=hostname,debug=debug) if not os.path.exists(filename): print("+++cannot find files that look like "+filename_truth+' -- returning None') return None,None,None,None,None,None,None else: if debug: print('opening file '+filename_truth) # open the truth file and load the field ft = Dataset(filename_truth,'r') VT = ft.variables[variable] # select the true state as the right copy copyt = get_copy(ft,'true state',debug) if (variable=='PS'): VT2 = VT[0,copy,j1:j2+1,i1:i2+1] else: VT2 = VT[0,copy,j1:j2+1,i1:i2+1,k1:k2+1] # close the truth file ft.close() # compute the square error SE = np.square(VV2-VT2) VVout = SE return lev2,lat2,lon2,VVout,P0,hybm,hyam
def load_covariance_file(E,date,hostname='taurus',debug=False): """ this subroutine loads in a pre-computed file of state-to-observation covariances and correlations. the state variable is given by E['variable'] the observation is given by E['obs_name'] """ # find the directory for this run # this requires running a subroutine called `find_paths`, stored in a module `experiment_datails`, # but written my each user -- it should take an experiment dictionary and the hostname # as input, and return as output # the filepath that corresponds to the desired field, diagnostic, etc. filename = es.find_paths(E,date,file_type='covariance',hostname=hostname) if not os.path.exists(filename): if debug: print("+++cannot find files that look like "+filename+' -- returning None') return None, None, None, None, None # load the netcdf file f = Dataset(filename,'r') lat = f.variables['lat'][:] lon = f.variables['lon'][:] if E['variable']!='PS': lev = f.variables['lev'][:] time = f.variables['time'][:] Correlation = f.variables['Correlation'][:] Covariance = f.variables['Covariance'][:] f.close() # squeeze out the time dimension -- for now. Might make this longer than 1 later # also select the right level, lat, and lon ranges # figure out which vertical level range we want if E['variable'] !='PS': levrange=E['levrange'] k1 = (np.abs(lev-levrange[1])).argmin() k2 = (np.abs(lev-levrange[0])).argmin() lev2 = lev[k1:k2+1] # figure out which latitude range we want latrange=E['latrange'] j2 = (np.abs(lat-latrange[1])).argmin() j1 = (np.abs(lat-latrange[0])).argmin() lat2 = lat[j1:j2+1] # figure out which longitude range we want lonrange=E['lonrange'] i2 = (np.abs(lon-lonrange[1])).argmin() i1 = (np.abs(lon-lonrange[0])).argmin() lon2 = lon[i1:i2+1] if E['variable']=='PS': R = np.squeeze(Correlation[j1:j2+1,i1:i2+1,0]) C = np.squeeze(Covariance[j1:j2+1,i1:i2+1,0]) lev2 = None else: R = np.squeeze(Correlation[j1:j2+1,i1:i2+1,k1:k2+1,0]) C = np.squeeze(Covariance[j1:j2+1,i1:i2+1,k1:k2+1,0]) # return covariance and correlation grids return lev2, lat2, lon2, C, R
def load_DART_diagnostic_file(E, date=datetime.datetime(2009, 1, 1, 1, 0, 0), hostname='taurus', debug=False, return_single_variables=False): """ Read a DART diagnostic netcdf file (e.g. files with names like modelname_Prior_Diagn.nc, etc) for a single data/time and for the variable and experiment specified in the experiment dictionary E. This code returns a dictionary, Dout, which holds the requested variable, its corresponding spacial dimension arrays (e.g. lat, lon, lev), units, and long name. To get these as single variables, set the input parameter return_single_variables to True. This will be deprecated eventually when all other visualization codes are changed to deal with single variables. """ # if debugging, print out what we're doing if debug: print('+++++++++++++++++++++++++++++++++++++++++') print("Retrieving experiment " + E['exp_name']) print("for diagnostic " + E['diagn']) print("variable " + E['variable']) if isinstance(date, str): datestr = date else: datestr = date.strftime("%Y-%m-%d") print("and date " + datestr) print('+++++++++++++++++++++++++++++++++++++++++') # retrieve the entries of the experiment dictionary, E: variable = E['variable'] experiment = E['exp_name'] # a list of 2d variables -- if the var is 2d, don't need to load vertical levels # TODO: add other 2d variables to this list variables_2d = ['PS', 'ptrop', 'ztrop'] # this is the dictionary that holds the output if not return_single_variables: Dout = dict() # find the directory for this run # this requires running a subroutine called `find_paths`, stored in a module `experiment_datails`, # but written my each user -- it should take an experiment dictionary and the hostname # as input, and return as output # the filepath that corresponds to the desired field, diagnostic, etc. filename = es.find_paths(E, date, 'diag', hostname=hostname, debug=debug) if not os.path.exists(filename): raise RuntimeError( "load_DART_diagn_file can't find files that look like " + filename) #if debug: # print("+++cannot find files that look like "+filename+' -- returning None') #if return_single_variables: # return None,None,None,None,None,None,None #else: # Dout['data']=None # return Dout else: if debug: print('opening file ' + filename) f = Dataset(filename, 'r') if variable in variables_2d: # don't need info about hybrid model levels if the variable is 2d lev = None P0 = None hybm = None hyam = None else: # TODO: add a check so that hybrid model level info is only loaded # for models with hybrid vertical levels lev = f.variables['lev'][:] P0 = f.variables['P0'][:] hybm = f.variables['hybm'][:] hyam = f.variables['hyam'][:] # load CopyMetaData if availabe if 'CopyMetaData' in f.variables: CMD = f.variables['CopyMetaData'][:] CopyMetaData = [] for ii in range(0, len(CMD)): temp = CMD[ii, ].tostring().decode("utf-8") CopyMetaData.append(temp.rstrip()) else: # if it's not available, look it up for that experiment CopyMetaData = es.get_expt_CopyMetaData_state_space(E) # load the requested dynamical variable - these can have different names, so # first check if the requested variable, and if it's not found, try alternatives if E['variable'] in f.variables: varname_load = E['variable'] else: # here is a dictionary that holds alternative variable names to try possible_varnames_dict = { 'T': ['t', 'var130'], 'TS': ['t', 'var130'], 'U': ['u', 'var131'], 'US': ['u', 'var131'], 'V': ['v', 'var132'], 'VS': ['v', 'var132'], 'Z': ['z', 'var129'], 'geopotential': ['z', 'var129'], 'GPH': ['Z', 'z', 'var129'], 'var129': ['Z', 'z', 'var129'], 'msl': ['var151'], 'mslp': ['var151'], 'ztrop': ['ptrop'], 'Nsq': ['brunt'] } possible_varnames_list = possible_varnames_dict[E['variable']] for varname in possible_varnames_list: if varname in f.variables: varname_load = varname # if the desired variable is still not found, throw an error and abort if 'varname_load' not in locals(): print('Unable to find variable ' + E['variable'] + ' in file ' + filename) # now actually load the variable, and replace its bad # values with NaNs V = f.variables[varname_load] if (variable == 'US'): lat = f.variables['slat'][:] else: lat = f.variables['lat'][:] if (variable == 'VS'): lon = f.variables['slon'][:] else: lon = f.variables['lon'][:] #------finding which copies to retrieve if type(E['copystring']) is not list: copies = None # if the diagnostic is the Truth, then the copy string can only be one thing if (E['diagn'] == 'Truth'): copies = get_copy(f, CopyMetaData, 'true state') # if we want the ensemble variance or std, copystring has to be the ensemble spread if (E['extras'] == 'ensemble variance') or ( E['extras'] == 'ensemble variance scaled') or (E['extras'] == 'ensemble std'): copies = get_copy(f, CopyMetaData, 'ensemble spread') # if requesting the entire ensemble, find the copies that contain the string 'ensemble member' if E['copystring'] is 'ensemble': copies = [ get_copy(f, CopyMetaData, cs) for cs in CopyMetaData if 'ensemble member' in cs ] # we can also request a sample of the total ensemble if 'ensemble sample' in E['copystring']: copies2 = [ get_copy(f, CopyMetaData, cs) for cs in CopyMetaData if 'ensemble member' in cs ] try: n = int(E['copystring'].split(' ')[2]) except ValueError: print('Warning: the copystring ' + E['copystring'] + ' isnt valid. Returning 2 ensemble members instead.') n = 2 pass copies = np.random.choice(copies2, size=n, replace=False) # if none of the above apply, just choose whatever is in copystring if copies is None: copies = [ get_copy(f, CopyMetaData, E['copystring'], debug=debug) ] else: copies = [ get_copy(f, CopyMetaData, cstring) for cstring in E['copystring'] ] #------done finding which copies to retrieve # initialize output directory and record the variable's metadata. try: Dout['units'] = V.units except AttributeError: Dout['units'] = '' try: Dout['long_name'] = V.long_name except AttributeError: Dout['long_name'] = '' # figure out which vertical level range we want if variable in variables_2d: lev2 = None else: levrange = E['levrange'] k1 = (np.abs(lev - levrange[1])).argmin() k2 = (np.abs(lev - levrange[0])).argmin() lev2 = lev[k1:k2 + 1] # figure out which latitude range we want latrange = E['latrange'] j2 = (np.abs(lat - latrange[1])).argmin() j1 = (np.abs(lat - latrange[0])).argmin() lat2 = lat[j1:j2 + 1] # figure out which longitude range we want lonrange = E['lonrange'] i2 = (np.abs(lon - lonrange[1])).argmin() i1 = (np.abs(lon - lonrange[0])).argmin() lon2 = lon[i1:i2 + 1] # now read in only the part of the variable within the lat, lon, and lev bounds # note that this assumes output shaped like time x copy x lat x lon x lev # TODO: is there away to make this more agnostic? # note also that we only choose the first time -- this works well with DART output # that have one time instance in each file, only. Again, another TODO woul dbe to make # this more grid-agnostic. if variable in variables_2d: VV = V[0, copies, j1:j2 + 1, i1:i2 + 1] else: VV = V[0, copies, j1:j2 + 1, i1:i2 + 1, k1:k2 + 1] # also record the netcdf fill value in the array if hasattr(V, '_FillValue'): VV = np.ma.masked_values(VV, V._FillValue) Dout['FillValue'] = V._FillValue # close the primary file f.close() #------------extra computations # if tropopause altitude (ztrop) was requested, we retrieved tropopause pressure -- # convert it here if E['variable'] == 'ztrop': H = 7.0 # 7 km scale height if np.max(VV) > 1000.0: # in this case, pressure is in Pa P0 = 1.0E5 else: P0 = 1.0E3 VVpress = VV VV = H * np.log(P0 / VVpress) # if the ensemble variance was requested, square it here if (E['extras'] == 'ensemble variance'): VVout = np.square(VV) # if the copystring is ensemble variance scaled, square the ensemble spread and scale by ensemble size if (E['extras'] == 'ensemble variance scaled'): if debug: print( 'squaring and scaling ensemble spread to get scaled variance' ) N = get_ensemble_size(f) fac = (N + 1) / N VVout = fac * np.square(VV) else: VVout = VV # if requestiing the mean square error (MSE), load the corresponding truth run and subtract it out, then square if (E['extras'] == 'MSE'): Etr = E.copy() Etr['diagn'] = 'Truth' filename_truth = es.find_paths(Etr, date, 'truth', hostname=hostname, debug=debug) if not os.path.exists(filename): print("+++cannot find files that look like " + filename_truth + ' -- returning None') return None, None, None, None, None, None, None else: if debug: print('opening file ' + filename_truth) # open the truth file and load the field ft = Dataset(filename_truth, 'r') VT = ft.variables[variable] # select the true state as the right copy copyt = get_copy(ft, 'true state', debug) if (variable == 'PS'): VT2 = VT[0, copy, j1:j2 + 1, i1:i2 + 1] else: VT2 = VT[0, copy, j1:j2 + 1, i1:i2 + 1, k1:k2 + 1] # close the truth file ft.close() # compute the square error SE = np.square(VV2 - VT2) VVout = SE if return_single_variables: return lev2, lat2, lon2, VVout, P0, hybm, hyam else: Dout['data'] = VVout Dout['lev'] = lev2 Dout['lat'] = lat2 Dout['lon'] = lon2 Dout['P0'] = P0 Dout['hybm'] = hybm Dout['hyam'] = hyam return (Dout)
def load_DART_obs_epoch_file(E, date_in=None, hostname='taurus', debug=False): """ this function reads in an obs_epoch_XXX.nc file for a certain DART experiment, with the obs that we want given in obs_type_list, and returns a vector of the desired observation. INPUTS: E: an experiment dictionary if E['copystring'] is a list of copystrings, we cycle through them. if one of the strings in E['copystring'] is 'ensemble member', then return all the ensemble members. if E['obs_name'] is a list of observation types, we cycle through and load them all. date: the date on which we want to load the obs the default for this is None -- in this case, just choose the first entry of E['daterange'] hostname: computer name - default is Taurus debug: debugging flag; default is False. """ # select the date if date_in is None: date_in = E['daterange'][0] # find the directory for this run # this requires running a subroutine called `find_paths`, stored in a module `experiment_datails`, # but written my each user -- it should take an experiment dictionary and the hostname # as input, and return as output # the filepath that corresponds to the desired field, diagnostic, etc. filename = es.find_paths(E, date_in, hostname=hostname, file_type='obs_epoch', debug=debug) if not os.path.exists(filename): if debug: print("+++cannot find files that look like " + filename + ' -- returning None') return None, None # load the file and select the observation we want else: f = Dataset(filename, 'r') if debug: print('Loading file ' + filename) observations = f.variables['observations'][:] time = f.variables['time'][:] copy = f.variables['copy'][:] location = f.variables['location'][:] CopyMetaData = f.variables['CopyMetaData'][:] ObsTypesMetaData = f.variables['ObsTypesMetaData'][:] obs_type = f.variables['obs_type'][:] QCMetaData_array = f.variables['QCMetaData'][:] qc = f.variables['qc'][:] qc_copy = f.variables['qc_copy'][:] # find the obs_type number corresponding to the desired observations if type(E['obs_name']) is list: obs_type_no_list = [] for obs_type_string in E['obs_name']: # note that obs_type_string could have weird spaces around it -- strip those off here obs_type_no_list.append( get_obs_type_number(f, obs_type_string.rstrip())) else: obs_type_no = get_obs_type_number(f, E['obs_name'].rstrip()) obs_type_no_list = [obs_type_no] if type(E['copystring']) is not list: # if E['copystring'] is not a list and not 'ensemble', # we only have one copy number to get -- cc tells us the number of it # note also the prior and posterio diagnostics are not available for everything, i.e observations themselves if 'observation' in E['copystring']: cc = get_copy(f, E['copystring']) else: diagn = E['diagn'] cc = get_copy(f, diagn.lower() + ' ' + E['copystring']) else: # if we have to retrieve more than one copy, # expand "CopyMetaData" into lists that hold ensemble status and diagnostic diagn = [] ens_status = [] CMD = [] for icopy in copy: temp = CopyMetaData[icopy - 1, ].tostring() CMD.append(temp.rstrip()) if 'prior' in temp: diagn.append('Prior') if 'posterior' in temp: diagn.append('Posterior') if 'truth' in temp: diagn.append('Truth') ens_status.append('Truth') if 'observation' in temp: diagn.append('') ens_status.append('Observation') if 'ensemble member' in temp: ens_status.append('ensemble member') if 'ensemble mean' in temp: ens_status.append('ensemble mean') if 'ensemble spread' in temp: ens_status.append('ensemble spread') if 'observation error variance' in temp: ens_status.append(None) diagn.append(None) f.close() #------locations, quality control, and observation codes for requested obs types # empty lists to hold various traits of the observations that fit the requested obs types iobs = [] iensstatus = [] obs_codes = [] lons = [] lats = [] levs = [] # create a dictionary to hold all available Quality Control flags QCMetaData = [QCMD.tostring() for QCMD in QCMetaData_array] QCdict = {k: [] for k in QCMetaData} # loop over all obs and store the relevant obs for the ones where the type code matches the request if debug: print('this is the list of obs type numbers') print(obs_type_no_list) for OTN in obs_type_no_list: itemp = np.where( obs_type == OTN) # observation numbers of all obs that fit this obs type if itemp is not None: if debug: temp = ObsTypesMetaData[OTN - 1, :].tostring() print('these obs indices match obs of type ' + temp.decode('UTF-8')) print(np.squeeze(itemp)) iobs.append(list(np.squeeze(itemp))) obs_codes.append(np.squeeze(obs_type[itemp])) lons.append(np.squeeze(location[itemp, 0])) lats.append(np.squeeze(location[itemp, 1])) levs.append(np.squeeze(location[itemp, 2])) # loop over all available QC flags and store in a list, then a dict # note that DART QC copies start at 1, but python indices start at 0 for iqc, qcname in zip(qc_copy, QCMetaData): QCdict[qcname].append(np.squeeze(qc[itemp, iqc - 1])) # we now have several lists (as many as the number of obs types we requested) # of lists --> turn them into a single list of indices iobs2 = [ii for sublist in iobs for ii in sublist] obs_codes_list = [ii for sublist in obs_codes for ii in sublist] lons_list = [ii for sublist in lons for ii in sublist] lats_list = [ii for sublist in lats for ii in sublist] levs_list = [ii for sublist in levs for ii in sublist] for qcname in QCMetaData: old_list = QCdict[qcname] new_list = [ii for sublist in old_list for ii in sublist] QCdict[qcname] = new_list if debug: print('retrieving ' + str(len(iobs2)) + ' observations') # instead of obs number codes, return strings that identify the obs obs_names_out = [ ObsTypesMetaData[obs_code - 1].tostring() for obs_code in obs_codes_list ] #------observation values for requested copies of the requested observations if type(E['copystring']) is not list: # in this case only a single copy, which is defined in E, is returned obs_out = observations[iobs2, cc] copy_names = E['diagn'].lower() + ' ' + E['copystring'] else: # in this case several copies are returned for CS in E['copystring']: # ensemble member names are stored weirdly in DART output -- convert here if 'ensemble member ' in CS: import re ensindex = re.sub(r'ensemble member*', '', CS).strip() if int(ensindex) < 10: spacing = ' ' else: spacing = ' ' CS = "ensemble member" + spacing + str(ensindex) if debug: print('looking for copy ' + CS) if CS is 'ensemble': # in this case look for all the copies that have ensemble status = "ensemble member" indices = [ i for i, x in enumerate(ens_status) if x == 'ensemble member' ] else: # for all other copystrings, just look for the CopyMetaData entries that contrain that copystring indices = [i for i, x in enumerate(CMD) if CS in x] if debug: print('here are the copy indices that fit this copystring') print(indices) iensstatus.extend(indices) iensstatus.sort( ) # this is the list of copies with the right ensemble status idiagn = [i for i, x in enumerate(diagn) if x == E['diagn'] ] # this is the list of copies with the right diagnostic if debug: print( 'here are the copy indices that fit the requested diagnostic') print(idiagn) # we are interested in the indices that appear in both iensstatus and idiagn sdiagn = set(idiagn) jj = [val for val in iensstatus if val in sdiagn] if debug: print( 'here are the copy indices that fit both the requested copystrings and the requested diagnostic' ) print(jj) print('this corresponds to the following:') for j in jj: print(CMD[j]) # now select the observations corresponding to these copies obs1 = observations[iobs2, :] obs2 = obs1[:, jj] obs_out = obs2 copy_names = [CMD[i] for i in jj] return obs_out, copy_names, obs_names_out, lons_list, lats_list, levs_list, QCdict
def load_covariance_file(E, date, hostname='taurus', debug=False): """ this subroutine loads in a pre-computed file of state-to-observation covariances and correlations. the state variable is given by E['variable'] the observation is given by E['obs_name'] """ # find the directory for this run # this requires running a subroutine called `find_paths`, stored in a module `experiment_datails`, # but written my each user -- it should take an experiment dictionary and the hostname # as input, and return as output # the filepath that corresponds to the desired field, diagnostic, etc. filename = es.find_paths(E, date, file_type='covariance', hostname=hostname) if not os.path.exists(filename): if debug: print("+++cannot find files that look like " + filename + ' -- returning None') return None, None, None, None, None # load the netcdf file f = Dataset(filename, 'r') lat = f.variables['lat'][:] lon = f.variables['lon'][:] if E['variable'] != 'PS': lev = f.variables['lev'][:] time = f.variables['time'][:] Correlation = f.variables['Correlation'][:] Covariance = f.variables['Covariance'][:] f.close() # squeeze out the time dimension -- for now. Might make this longer than 1 later # also select the right level, lat, and lon ranges # figure out which vertical level range we want if E['variable'] != 'PS': levrange = E['levrange'] k1 = (np.abs(lev - levrange[1])).argmin() k2 = (np.abs(lev - levrange[0])).argmin() lev2 = lev[k1:k2 + 1] # figure out which latitude range we want latrange = E['latrange'] j2 = (np.abs(lat - latrange[1])).argmin() j1 = (np.abs(lat - latrange[0])).argmin() lat2 = lat[j1:j2 + 1] # figure out which longitude range we want lonrange = E['lonrange'] i2 = (np.abs(lon - lonrange[1])).argmin() i1 = (np.abs(lon - lonrange[0])).argmin() lon2 = lon[i1:i2 + 1] if E['variable'] == 'PS': R = np.squeeze(Correlation[j1:j2 + 1, i1:i2 + 1, 0]) C = np.squeeze(Covariance[j1:j2 + 1, i1:i2 + 1, 0]) lev2 = None else: R = np.squeeze(Correlation[j1:j2 + 1, i1:i2 + 1, k1:k2 + 1, 0]) C = np.squeeze(Covariance[j1:j2 + 1, i1:i2 + 1, k1:k2 + 1, 0]) # return covariance and correlation grids return lev2, lat2, lon2, C, R
def load_DART_obs_epoch_file_as_dataframe(E, date=datetime.datetime( 2009, 1, 1, 0, 0, 0), obs_type_list=['ERP_PM1', 'ERP_LOD'], ens_status_list=['ensemble member'], hostname='taurus', debug=False): """ read in a DART obs epoch file, defined by its date and the Experiment E, and return as a Pandas data frame, in which al the observations that have ensemble status and obs types given in ens_status_list and obs_type_list, respectively, are ordered according to ObsIndex. this should eventually replace the SR load_DART_obs_epoch_file """ # find the directory for this run # this requires running a subroutine called `find_paths`, stored in a module `experiment_datails`, # but written my each user -- it should take an experiment dictionary and the hostname # as input, and return as output # the filepath that corresponds to the desired field, diagnostic, etc. filename = es.find_paths(E, date, hostname=hostname) if not os.path.exists(filename): if debug: print("+++cannot find files that look like " + filename + ' -- returning None') return None # load the file and select the observation we want else: f = Dataset(filename, 'r') CopyMetaData = f.variables['CopyMetaData'][:] ObsTypesMetaData = f.variables['ObsTypesMetaData'][:] observations = f.variables['observations'][:] time = f.variables['time'][:] copy = f.variables['copy'][:] obs_type = f.variables['obs_type'][:] location = f.variables['location'][:] ObsIndex = f.variables['ObsIndex'][:] qc = f.variables['qc'][:] # find the obs_type number corresponding to the desired observations obs_type_no_list = [] for obs_type_string in obs_type_list: obs_type_no_list.append(get_obs_type_number(f, obs_type_string)) # expand "CopyMetaData" into lists that hold ensemble status and diagnostic diagn = [] ens_status = [] CMD = [] # loop over the copy meta data and record the ensemble status and diagnostic for reach copy for icopy in copy: temp = CopyMetaData[icopy - 1, ].tostring() CMD.append(temp.rstrip()) if 'prior' in temp: diagn.append('Prior') if 'posterior' in temp: diagn.append('Posterior') if 'truth' in temp: diagn.append('Truth') ens_status.append('Truth') if 'observations' in temp: diagn.append('Observation') ens_status.append('Observation') if 'ensemble member' in temp: ens_status.append('ensemble member') if 'ensemble mean' in temp: ens_status.append('ensemble mean') if 'ensemble spread' in temp: ens_status.append('ensemble spread') if 'observation error variance' in temp: ens_status.append(None) diagn.append(None) f.close() # return the desired observations and copys, and the copy meta data #for obs_type_no in obs_type_no_list: iobs = [] iensstatus = [] if debug: print('selecting the following obs type numbers') print(obs_type_no_list) for OTN in obs_type_no_list: itemp = np.where(obs_type == OTN) if itemp is not None: # itemp is a tuple - the first entry is the list of indices (I know - this is f****d) itemp2 = itemp[0] # now scoot through itemp2 (which is an ndarray...wtf?) and store the entires in a list for i in itemp2: iobs.append(i) # select the copys correposnind go the right ensemble status (or just copystring if the list isn;t give) and diagnostic if ens_status_list is None: ens_status_list = [] ens_status_list.append(E['copystring']) if debug: print(ens_status_list) for ES in ens_status_list: indices = [i for i, x in enumerate(ens_status) if x == ES] iensstatus.extend(indices) iensstatus.sort( ) # this is the list of copies with the right ensemble status idiagn = [i for i, x in enumerate(diagn) if x == E['diagn'] ] # this is the list of copies with the right diagnostic # we are interested in the indices that appear in both iensstatus and idiagn sdiagn = set(idiagn) cc = [val for val in iensstatus if val in sdiagn] if debug: print( 'these are the copies that suit both the requested ensemble status and the requested diagnostic:' ) print(cc) # given the above copy numbers, find the names that suit them copynames = [CMD[ii] for ii in cc] # turn the array obs_type from numbers to words OT = [] for ii in obs_type: temp = ObsTypesMetaData[ii - 1, ].tostring() OT.append(temp) # these are the obs types for the observations we select out OT_select = [OT[ii] for ii in iobs] # now select the observations corresponding to the selected copies and obs types i1 = np.array(iobs) i2 = np.array(cc) obs_select = observations[i1[:, None], i2] location_select = location[i1, ] obs_type_select = obs_type[i1, ] qc1_select = qc[i1, 0] qc2_select = qc[i1, 1] time_select = time[i1] ObsIndex_select = ObsIndex[i1] # for the arrays that are only defined by obs index, replicate for each copy loc1 = location_select[:, 0] loc2 = location_select[:, 1] loc3 = location_select[:, 2] loc1_copies = np.repeat(loc1[:, np.newaxis], len(i2), 1) loc2_copies = np.repeat(loc2[:, np.newaxis], len(i2), 1) loc3_copies = np.repeat(loc3[:, np.newaxis], len(i2), 1) qc1_copies = np.repeat(qc1_select[:, np.newaxis], len(i2), 1) qc2_copies = np.repeat(qc2_select[:, np.newaxis], len(i2), 1) obs_type_copies = np.repeat(obs_type_select[:, np.newaxis], len(i2), 1) ObsIndex_copies = np.repeat(ObsIndex_select[:, np.newaxis], len(i2), 1) # reshape the output from arrays to vectors # also have to squeeze out the empty dimension -- this seems really inelegant, but I don't know a better way to do it! L = len(iobs) * len(cc) # length of the data vector date_out = np.repeat(date, L) obs_out = np.squeeze(np.reshape(obs_select, (L, 1))) lon_out = np.squeeze(np.reshape(loc1_copies, (L, 1))) lat_out = np.squeeze(np.reshape(loc2_copies, (L, 1))) lev_out = np.squeeze(np.reshape(loc3_copies, (L, 1))) qc1_out = np.squeeze(np.reshape(qc1_copies, (L, 1))) qc2_out = np.squeeze(np.reshape(qc2_copies, (L, 1))) obs_type_out = np.squeeze(np.reshape(obs_type_copies, (L, 1))) ObsIndex_out = np.squeeze(np.reshape(ObsIndex_copies, (L, 1))) # for each of the selected obs, report its copystring, ensemble status, and obs type copynames_out = [] for ii in range(len(iobs)): for cn in copynames: copynames_out.append(cn) # round the location values because otherwise pandas f***s up the categorial variable aspect of them lat_out = np.round(lat_out, 1) lon_out = np.round(lon_out, 1) lev_out = np.round(lev_out) # return data frame data = { 'QualityControl': qc1_out, 'DARTQualityControl': qc2_out, 'Value': obs_out, 'Latitude': lat_out, 'Longitude': lon_out, 'Level': lev_out, 'Date': date_out, 'CopyName': copynames_out } DF = pd.DataFrame(data, index=ObsIndex_out) # turn categorical data into categories #DF['QualityControl'] = DF['QualityControl'].astype('category') #DF['Latitude'] = DF['Latitude'].astype('category') #DF['Longitude'] = DF['Longitude'].astype('category') #DF['Level'] = DF['Level'].astype('category') #DF['CopyName'] = DF['CopyName'].astype('category') #return ObsIndex_out, loc1_out, loc2_out, loc3_out, qc_out, obs_out, copynames return DF