Example #1
0
def Output_files(New_combined, myBaseforResults, Site_ID, versionID):
    New_combined_grouped = New_combined.groupby([lambda x: x.year])

    for year_index in New_combined_grouped:
        print year_index[0]

        print "Starting output for EddyProc MPI online tool"
        #Check for place to put results - does it exist? If not create
        if not os.path.isdir(myBaseforResults):
            os.mkdir(myBaseforResults)
        #Then subdirectories
        if not os.path.isdir(myBaseforResults + "/REddyProc"):
            os.mkdir(myBaseforResults + "/REddyProc")
        mypathforResults = myBaseforResults + "/REddyProc/"

        #Calculate RH_con
        New_combined['RH_Con'] = metfuncs.RHfromabsolutehumidity(
            New_combined['Ah_Con'], New_combined['Ta_Con'])

        #Convert VPD in kPa to hPa.
        #We need to update VPD for input here so also need e and es
        # Calculate vapour pressure from absolute humidity and temperature
        #  Ah - absolute humidity, g/m3
        #  Ta - air temperature, C
        New_combined['VPD_hPa_Con'] = (metfuncs.es(
            New_combined['Ta_Con'])) - (metfuncs.vapourpressure(
                New_combined['Ah_Con'], New_combined['Ta_Con'])) / 10
        REddyProc_DF = New_combined[[
            'Fc', 'Fe', 'Fh', 'Fg', 'Ta_Con', 'Ts_Con', 'RH_Con',
            'VPD_hPa_Con', 'ustar'
        ]]

        #The date/time components are separated into columns. E.g. first column: julian day, second column: decimal hour.
        #Possible date formats are indicated in the input form. Never use an hour of 24 with the time
        #format 'year', 'month', 'day', 'hour', 'minute' (use 0 instead). Hour '0' is interpreted as first hour of the day,
        #i.e. when you have transition from one to another it must be like (day, 23 --> day+1, 0) not like (day, 23 --> day, 0),
        #because then the data set is not chronological (this misunderstanding happened before).

        #REddyProc_DF['DT1','DT2','DT3','DT4','DT5','DT5']=REddyProc_DF.index.timetuple()
        REddyProc_DF['DTcopy'] = REddyProc_DF.index

        REddyProc_DF['Day'] = REddyProc_DF['DTcopy'].apply(
            lambda x: int(x.strftime('%j')))
        REddyProc_DF['Year'] = REddyProc_DF['DTcopy'].apply(
            lambda x: int(x.strftime('%Y')))
        REddyProc_DF['Hour'] = REddyProc_DF['DTcopy'].apply(
            lambda x: float(x.strftime('%H')) + (float(x.strftime('%M')) / 60))

        #Select current year of yaer only
        REddyProc_DF = REddyProc_DF[REddyProc_DF['Year'] == year_index[0]]

        #Calculate some things for plots
        n_datapoints = len(REddyProc_DF)
        startdate = REddyProc_DF.index[0]
        enddate = REddyProc_DF.index[n_datapoints - 1]
        print n_datapoints, startdate, enddate

        header_names = [
            'Year', 'Day', 'Hour', 'NEE', 'LE', 'H', 'Rg', 'Tair', 'Tsoil',
            'rH', 'VPD', 'Ustar'
        ]
        columns_out = [
            'Year', 'Day', 'Hour', 'Fc', 'Fe', 'Fh', 'Fg', 'Ta_Con', 'Ts_Con',
            'RH_Con', 'VPD_hPa_Con', 'ustar'
        ]

        newline1 = 'Year \t DoY \t Hour \t NEE \t LE \t H \t Rg \t Tair \t Tsoil \t rH \t VPD \t Ustar'
        newline2 = " -- \t -- \t -- \t umolm-2s-1 \t Wm-2 \t Wm-2 \t Wm-2 \t degC \t degC \t % \t hPa \t ms-1"

        #newline1='Year,Day,Hour,NEE,LE,H,Rg,Tair,Tsoil,rH,VPD,Ustar'
        #newline2="--,--,--,umolm-2s-1,Wm-2,Wm-2,Wm-2,degC,degC,%,hPa,ms-1"

        output_temp_filename = mypathforResults + '/REddyProc_temp_' + Site_ID + '_' + str(
            year_index[0]) + '_' + versionID + '.txt'
        output_filename = mypathforResults + '/REddyProc_' + Site_ID + '_' + str(
            year_index[0]) + '_' + versionID + '.txt'

        REddyProc_DF.to_csv(output_temp_filename,
                            sep='\t',
                            na_rep='-9999',
                            float_format='%.3f',
                            cols=columns_out,
                            header=False,
                            index=False,
                            index_label=None,
                            mode='w')

        #Now add another line with units
        #Open txt file
        with open(output_temp_filename) as infile:
            with open(output_filename, "w") as outfile:
                outfile.write(newline1 + "\n")
                outfile.write(newline2 + "\n")
                for i, line in enumerate(infile):
                    outfile.write(line)

        os.remove(output_temp_filename)

    #####################
    # Finish up
    ######################

    print "FINISHED writing out files for use in EddyProc MPI online tool "
Example #2
0
def Output_files(New_combined,myBaseforResults,Site_ID,versionID,Ws_variable_name):
    #Do any calculations on the whole datasewt before grouping
    #Calculate RH_con
    New_combined['RH_Con']=metfuncs.RHfromabsolutehumidity(New_combined['Ah_Con'],New_combined['Ta_Con'])
    #Convert VPD in kPa to hPa.
    #We need to update VPD for input here so also need e and es
    # Calculate vapour pressure from absolute humidity and temperature
    #  Ah - absolute humidity, g/m3
    #  Ta - air temperature, C
    New_combined['VPD_kPa_Con']=(metfuncs.es(New_combined['Ta_Con']))-(metfuncs.vapourpressure(New_combined['Ah_Con'],New_combined['Ta_Con']))   

    #Do timestamp operations
    #Make a copy of timestamp to the df
    #Take mean first (equal to mid day) then convert to DOY, day, month and year
    New_combined['DTcopy']=New_combined.index
    New_combined['Year']=New_combined['DTcopy'].apply(lambda x: int(x.strftime('%Y')))
    
 

    #Group DF by year
    New_combined_grouped=New_combined.groupby([lambda x: x.year])
    
    for year_index in New_combined_grouped:
        print year_index[0]
        
        print "Starting output for NASA"
        #Check for place to put results - does it exist? If not create
        if not os.path.isdir(myBaseforResults):
            os.mkdir(myBaseforResults)
        #Then subdirectories
        if not os.path.isdir(myBaseforResults+"/NASA_out"):
            os.mkdir(myBaseforResults+"/NASA_out")
        mypathforResults=myBaseforResults+"/NASA_out/"
        
        #Subset the DF to make it easier
	#WD removed here as its not required for NASA and for some sites SD variable names are not standard
	if Ws_variable_name=="Ws_CSAT":
	    REddyProc_DF=New_combined[['DTcopy','Year','Ah_Con','Cc','eta','Fa','Fc_ustar','GPP_Con','Fre_Con','Fe_Con','Fg_Con','Fh_Con','Fld_Con','Flu_Con','Fm','Fn_Con','Fsd_Con','Fsu_Con','ps_Con','Precip_Con','Sws_Con','Ta_Con','Ts_Con','ustar','Ws_CSAT_Con','RH_Con','VPD_kPa_Con','Ah_Con_QCFlag','Fc_Con_QCFlag','Fe_Con_QCFlag','Fg_Con_QCFlag','Fh_Con_QCFlag','Fld_Con_QCFlag','Flu_Con_QCFlag','Fn_Con_QCFlag','Fsd_Con_QCFlag','Fsu_Con_QCFlag','ps_Con_QCFlag','Precip_Con_QCFlag','Sws_Con_QCFlag','Ta_Con_QCFlag','Ts_Con_QCFlag']]
	else:
	    REddyProc_DF=New_combined[['DTcopy','Year','Ah_Con','Cc','eta','Fa','Fc_ustar','GPP_Con','Fre_Con','Fe_Con','Fg_Con','Fh_Con','Fld_Con','Flu_Con','Fm','Fn_Con','Fsd_Con','Fsu_Con','ps_Con','Precip_Con','Sws_Con','Ta_Con','Ts_Con','ustar','Ws_Con','RH_Con','VPD_kPa_Con','Ah_Con_QCFlag','Fc_Con_QCFlag','Fe_Con_QCFlag','Fg_Con_QCFlag','Fh_Con_QCFlag','Fld_Con_QCFlag','Flu_Con_QCFlag','Fn_Con_QCFlag','Fsd_Con_QCFlag','Fsu_Con_QCFlag','ps_Con_QCFlag','Precip_Con_QCFlag','Sws_Con_QCFlag','Ta_Con_QCFlag','Ts_Con_QCFlag']]
	
        #Select current year of yaer only
        REddyProc_DF=REddyProc_DF[REddyProc_DF['Year']==year_index[0]]
        
        #Calculate some things for plots
        n_datapoints=len(REddyProc_DF)
        startdate= REddyProc_DF.index[0]
        enddate= REddyProc_DF.index[n_datapoints-1]
        print n_datapoints,startdate,enddate
        
        #Calculate the DAILY means/sums from the half hourly data

	tempDF_mean=REddyProc_DF.groupby(lambda x : x.dayofyear).mean().add_suffix('_mean')
	tempDF_sum=REddyProc_DF.groupby(lambda x : x.dayofyear).sum().add_suffix('_sum')
	
	tempDF=tempDF_mean.join(tempDF_sum,how='left') 
			
	#Add QC counts to the means DF
	#Good QC value not gap filled is 1.  Get sall values ==1 then do a count.  Divide by  48 for 48 half hour periods in the day
	tempDF['Rn_qc']=REddyProc_DF['Fn_Con_QCFlag'][REddyProc_DF['Fn_Con_QCFlag']==1.].groupby(lambda x : x.dayofyear).count()/48
	tempDF['Rn_qc'].fillna(value=0,inplace=True)
	tempDF['Rs_qc']=REddyProc_DF['Fsd_Con_QCFlag'][REddyProc_DF['Fsd_Con_QCFlag']==1.].groupby(lambda x : x.dayofyear).count()/48
	tempDF['Rs_qc'].fillna(value=0,inplace=True)
	tempDF['Ta_qc']=REddyProc_DF['Ta_Con_QCFlag'][REddyProc_DF['Ta_Con_QCFlag']==1.].groupby(lambda x : x.dayofyear).count()/48
	tempDF['Ta_qc'].fillna(value=0,inplace=True)
	tempDF['VPD_qc']=REddyProc_DF['Ah_Con_QCFlag'][REddyProc_DF['Ah_Con_QCFlag']==1.].groupby(lambda x : x.dayofyear).count()/48
	tempDF['VPD_qc'].fillna(value=0,inplace=True)
	tempDF['Ts_qc']=REddyProc_DF['Ts_Con_QCFlag'][REddyProc_DF['Ts_Con_QCFlag']==1.].groupby(lambda x : x.dayofyear).count()/48
	tempDF['Ts_qc'].fillna(value=0,inplace=True)
	tempDF['NEE_qc']=REddyProc_DF['Fc_Con_QCFlag'][REddyProc_DF['Fc_Con_QCFlag']==1.].groupby(lambda x : x.dayofyear).count()/48
	tempDF['NEE_qc'].fillna(value=0,inplace=True)
	tempDF['GPP_qc']=REddyProc_DF['Fc_Con_QCFlag'][REddyProc_DF['Fc_Con_QCFlag']==1.].groupby(lambda x : x.dayofyear).count()/48
	tempDF['GPP_qc'].fillna(value=0,inplace=True)
	tempDF['Reco_qc']=REddyProc_DF['Fc_Con_QCFlag'][REddyProc_DF['Fc_Con_QCFlag']==1.].groupby(lambda x : x.dayofyear).count()/48
	tempDF['Reco_qc'].fillna(value=0,inplace=True)	
	
	#add a site lable to columns
	tempDF['Site_ID']=Site_ID
	
	tempDF['DTmean']=REddyProc_DF['DTcopy'].groupby(lambda x : x.dayofyear).min()
	tempDF['Day']=tempDF['DTmean'].apply(lambda x: int(x.strftime('%d')))
	tempDF['Month']=tempDF['DTmean'].apply(lambda x: int(x.strftime('%m')))        
	tempDF['Year']=tempDF['DTmean'].apply(lambda x: int(x.strftime('%Y')))
	# Jan the 1st is day 1           
	#tempDF['DOY'] = (tempDF['DTmean'] - dt.datetime(year_index[0], 1, 1))
	
	tempDF['DOY'] = tempDF['DTmean'].apply(lambda x: int(x.strftime('%j')))

	#Do conversions for Carbon variables (convert from umol to g C for NASA)
	tempDF['Fc_ustar_mean']=tempDF['Fc_ustar_mean']*60*60*24/1000000*12
	tempDF['GPP_Con_mean']=tempDF['GPP_Con_mean']*60*60*24/1000000*12
	tempDF['Fre_Con_mean']=tempDF['Fre_Con_mean']*60*60*24/1000000*12
		
	#Do conversions for Radiation variables (convert Wm-2 to MJ m-2 day-1)
	tempDF['Fsd_Con_mean']=tempDF['Fsd_Con_mean']*60*60*24/1000000
	tempDF['Fn_Con_mean']=tempDF['Fn_Con_mean']  *60*60*24/1000000

	newline2="ID, Year, Mo, Day, DOY, Rn_f, Rn_qc, Rs_f, Rs_qc, Ta, Ta_qc, VPD, VPD_qc, Ts_f, Ts_qc, PREC, SWC, NEE, NEE_qc, GPP, GPP_qc, Reco, Reco_qc, PRESS, SNOWD"
	newline3="-, -, -, -, -, MJ m-2 day-1, -, MJ m-2 day-1, -, oC, -, kPa, -, oC, -, mm day-1, m3/m3, gC m-2 day-1, -, gC m-2 day-1, -, gC m-2 day-1, -, MPa day-1, mm"
	columns_out  = ['Site_ID','Year','Month','Day','DOY', 'Fn_Con_mean','Rn_qc','Fsd_Con_mean','Rs_qc', 'Ta_Con_mean', 'Ta_qc', 'VPD_kPa_Con_mean', 'VPD_qc', 'Ts_Con_mean', 'Ts_qc', 'Precip_Con_sum', 'Sws_Con_mean', 'Fc_ustar_mean', 'NEE_qc', 'GPP_Con_mean', 'GPP_qc','Fre_Con_mean', 'Reco_qc', 'ps_Con_mean']
	output_temp_filename=mypathforResults+'/NASA_SMAP_temp_'+Site_ID+'_'+str(year_index[0])+'_'+versionID +'.csv'
	output_filename=mypathforResults+'/NASA_SMAP_'+Site_ID+'_'+str(year_index[0])+'_'+versionID +'.csv'
	tempDF[columns_out].to_csv(output_temp_filename, na_rep='-9999', float_format='%.3f', header=False, index=False, index_label=None, mode='w')

        
        #Now add another line with units
        #Open txt file
        with open(output_temp_filename) as infile:
	    with open(output_filename,"w") as outfile:
		#outfile.write(newline1+"\n")
		outfile.write(newline2+"\n")
		outfile.write(newline3+"\n")                   
                for i,line in enumerate(infile):
                        outfile.write(line)
        
        os.remove(output_temp_filename)
        
    #####################
    # Finish up
    ######################
    
    print "FINISHED writing out files for use for NASA "
Example #3
0
def Output_files(New_combined,myBaseforResults,Site_ID,versionID):
    New_combined_grouped=New_combined.groupby([lambda x: x.year])
    
    for year_index in New_combined_grouped:
        print year_index[0]
        
        print "Starting output for WAVES"
        #Check for place to put results - does it exist? If not create
        if not os.path.isdir(myBaseforResults):
            os.mkdir(myBaseforResults)
        #Then subdirectories
        if not os.path.isdir(myBaseforResults+"/WAVES"):
            os.mkdir(myBaseforResults+"/WAVES")
        mypathforResults=myBaseforResults+"/WAVES/"
        
        #Calculate RH_con
        New_combined['RH_Con']=metfuncs.RHfromabsolutehumidity(New_combined['Ah_Con'],New_combined['Ta_Con'])
        
        #Convert VPD in kPa to hPa.
        #We need to update VPD for input here so also need e and es
        # Calculate vapour pressure from absolute humidity and temperature
        #  Ah - absolute humidity, g/m3
        #  Ta - air temperature, C
        New_combined['VPD_hPa_Con']=(metfuncs.es(New_combined['Ta_Con']))-(metfuncs.vapourpressure(New_combined['Ah_Con'],New_combined['Ta_Con']))/10
        REddyProc_DF=New_combined[['Ah_Con','Cc','eta','Fa','Fc_Con','Fe_Con','Fg_Con','Fh_Con','Fld_Con','Flu_Con','Fm','Fn_Con','Fsd_Con','Fsu_Con','ps_Con','Precip_Con','Sws_Con','Sws_5cma','Sws_50cma','Ta_Con','theta','Ts_Con','ustar','Ws_CSAT_Con','Wd_CSAT','RH_Con','VPD_hPa_Con']]
              
        #The date/time components are separated into columns. E.g. first column: julian day, second column: decimal hour. 
        #Possible date formats are indicated in the input form. Never use an hour of 24 with the time 
        #format 'year', 'month', 'day', 'hour', 'minute' (use 0 instead). Hour '0' is interpreted as first hour of the day, 
        #i.e. when you have transition from one to another it must be like (day, 23 --> day+1, 0) not like (day, 23 --> day, 0),
        #because then the data set is not chronological (this misunderstanding happened before).
        
        #REddyProc_DF['DT1','DT2','DT3','DT4','DT5','DT5']=REddyProc_DF.index.timetuple()
        REddyProc_DF['DTcopy']=REddyProc_DF.index
        
        REddyProc_DF['Day']=REddyProc_DF['DTcopy'].apply(lambda x: int(x.strftime('%j')))
        REddyProc_DF['Year']=REddyProc_DF['DTcopy'].apply(lambda x: int(x.strftime('%Y')))
        REddyProc_DF['Hour']=REddyProc_DF['DTcopy'].apply(lambda x: float(x.strftime('%H'))+(float(x.strftime('%M'))/60))
        
        #Select current year of yaer only
        REddyProc_DF=REddyProc_DF[REddyProc_DF['Year']==year_index[0]]
        
        #Calculate some things for plots
        n_datapoints=len(REddyProc_DF)
        startdate= REddyProc_DF.index[0]
        enddate= REddyProc_DF.index[n_datapoints-1]
        print n_datapoints,startdate,enddate
        
        #newline1="TIMESTAMP,Merged from Ah_HMP_23m Ah_7500_Av Ah_HMP_2m,CO2 concentration average,Merged from Cc_7500_Av converted to umol/mol,Horizontal rotation angle,Available energy using Fn Fg,CO2 flux rotated to natural wind coordinates WPL corrected Fc converted to umol/m2/s,Latent heat flux rotated to natural wind coordinates WPL corrected Fe,Element-wise average of series Fg_8cma Fg_8cmb Fg_8cmc Fg_8cmd Soil heat flux corrected for storage,Sensible heat flux rotated to natural wind coordinates Fh rotated and converted from virtual heat flux,Down-welling long wave,Up-welling long wave,Momentum flux rotated to natural wind coordinates,Merged from Fn_KZ Fn_NR	Down-welling short wave	Up-welling short wave,Air pressure standard deviation,Element-wise average of series Sws_10cma Sws_10cmb,Soil water fraction sensor 2a,Soil water fraction sensor 3a,Merged from Ta_HMP_23m Ta_CSAT Ta_HMP_2m,Vertical rotation angle,Element-wise average of series Ts_8cma,Friction velocity rotated to natural wind coordinates,ustar filtered for low turbulence conditions (<0.25),Wind speed,Wind direction"
        newline2="DSN,g/m3,mg/m3,deg,W/m2,umol/m2/s,W/m2,W/m2,W/m2,W/m2,W/m2,kg/m/s2,W/m2,W/m2,W/m2,kPa,mm,frac,frac,frac,C,deg,C,m/s,m/s,deg,hPa,frac"
        newline3= "TIMESTAMP,Ah_Con,Cc,eta,Fa,Fc_Con,Fe_Con,Fg_Con,Fh_Con,Fld_Con,Flu_Con,Fm,Fn_Con,Fsd_Con,Fsu_Con,ps_Con,Precip_Con,Sws_Con,Sws_5cm,Sws_50cm,Ta_Con,theta,Ts_Con,ustar,Ws_CSAT_Con,Wd_CSAT,RH_Con,VPD_hPa_Con"
              
        columns_out  = ['DTcopy','Ah_Con','Cc','eta','Fa','Fc_Con','Fe_Con','Fg_Con','Fh_Con','Fld_Con','Flu_Con','Fm','Fn_Con','Fsd_Con','Fsu_Con','ps_Con','Precip_Con','Sws_Con','Sws_5cma','Sws_50cma','Ta_Con','theta','Ts_Con','ustar','Ws_CSAT_Con','Wd_CSAT','RH_Con','VPD_hPa_Con']             
        
        output_temp_filename=mypathforResults+'/WAVES_temp_'+Site_ID+'_'+str(year_index[0])+'_'+versionID +'.csv'
        output_filename=mypathforResults+'/WAVES_'+Site_ID+'_'+str(year_index[0])+'_'+versionID +'.csv'
        
        
        
        REddyProc_DF.to_csv(output_temp_filename, na_rep='-9999', float_format='%.3f', cols=columns_out, header=False, index=False, index_label=None, mode='w')
        
        #Now add another line with units
        #Open txt file
	with open(output_temp_filename) as infile:
	    with open(output_filename,"w") as outfile:
		#outfile.write(newline1+"\n")
		outfile.write(newline2+"\n")
		outfile.write(newline3+"\n")                   
		for i,line in enumerate(infile):
			outfile.write(line)
	
	os.remove(output_temp_filename)
        
    #####################
    # Finish up
    ######################
    
    print "FINISHED writing out files for use for WAVES "
def Output_files(New_combined,myBaseforResults,Site_ID,versionID):
    New_combined_grouped=New_combined.groupby([lambda x: x.year])
    
    for year_index in New_combined_grouped:
        print year_index[0]
        
        print "Starting output for WAVES"
        #Check for place to put results - does it exist? If not create
        if not os.path.isdir(myBaseforResults):
            os.mkdir(myBaseforResults)
        #Then subdirectories
        if not os.path.isdir(myBaseforResults+"/WAVES"):
            os.mkdir(myBaseforResults+"/WAVES")
        mypathforResults=myBaseforResults+"/WAVES/"
        
        #Calculate RH_con
        New_combined['RH_Con']=metfuncs.RHfromabsolutehumidity(New_combined['Ah_Con'],New_combined['Ta_Con'])
        
        #Convert VPD in kPa to hPa.
        #We need to update VPD for input here so also need e and es
        # Calculate vapour pressure from absolute humidity and temperature
        #  Ah - absolute humidity, g/m3
        #  Ta - air temperature, C
        New_combined['VPD_hPa_Con']=(metfuncs.es(New_combined['Ta_Con']))-(metfuncs.vapourpressure(New_combined['Ah_Con'],New_combined['Ta_Con']))/10
        REddyProc_DF=New_combined[['Ah_Con','Cc','eta','Fa','Fc_Con','Fe_Con','Fg_Con','Fh_Con','Fld_Con','Flu_Con','Fm','Fn_Con','Fsd_Con','Fsu_Con','ps_Con','Precip_Con','Sws_Con','Sws_05','Sws_50','Ta_Con','theta','Ts_Con','ustar','Ws_CSAT_Con','Wd_CSAT','RH_Con','VPD_hPa_Con']]
              
        #The date/time components are separated into columns. E.g. first column: julian day, second column: decimal hour. 
        #Possible date formats are indicated in the input form. Never use an hour of 24 with the time 
        #format 'year', 'month', 'day', 'hour', 'minute' (use 0 instead). Hour '0' is interpreted as first hour of the day, 
        #i.e. when you have transition from one to another it must be like (day, 23 --> day+1, 0) not like (day, 23 --> day, 0),
        #because then the data set is not chronological (this misunderstanding happened before).
        
        #REddyProc_DF['DT1','DT2','DT3','DT4','DT5','DT5']=REddyProc_DF.index.timetuple()
        REddyProc_DF['DTcopy']=REddyProc_DF.index
        
        REddyProc_DF['Day']=REddyProc_DF['DTcopy'].apply(lambda x: int(x.strftime('%j')))
        REddyProc_DF['Year']=REddyProc_DF['DTcopy'].apply(lambda x: int(x.strftime('%Y')))
        REddyProc_DF['Hour']=REddyProc_DF['DTcopy'].apply(lambda x: float(x.strftime('%H'))+(float(x.strftime('%M'))/60))
        
        #Select current year of yaer only
        REddyProc_DF=REddyProc_DF[REddyProc_DF['Year']==year_index[0]]
        
        #Calculate some things for plots
        n_datapoints=len(REddyProc_DF)
        startdate= REddyProc_DF.index[0]
        enddate= REddyProc_DF.index[n_datapoints-1]
        print n_datapoints,startdate,enddate
        
        #newline1="TIMESTAMP,Merged from Ah_HMP_23m Ah_7500_Av Ah_HMP_2m,CO2 concentration average,Merged from Cc_7500_Av converted to umol/mol,Horizontal rotation angle,Available energy using Fn Fg,CO2 flux rotated to natural wind coordinates WPL corrected Fc converted to umol/m2/s,Latent heat flux rotated to natural wind coordinates WPL corrected Fe,Element-wise average of series Fg_8cma Fg_8cmb Fg_8cmc Fg_8cmd Soil heat flux corrected for storage,Sensible heat flux rotated to natural wind coordinates Fh rotated and converted from virtual heat flux,Down-welling long wave,Up-welling long wave,Momentum flux rotated to natural wind coordinates,Merged from Fn_KZ Fn_NR	Down-welling short wave	Up-welling short wave,Air pressure standard deviation,Element-wise average of series Sws_10cma Sws_10cmb,Soil water fraction sensor 2a,Soil water fraction sensor 3a,Merged from Ta_HMP_23m Ta_CSAT Ta_HMP_2m,Vertical rotation angle,Element-wise average of series Ts_8cma,Friction velocity rotated to natural wind coordinates,ustar filtered for low turbulence conditions (<0.25),Wind speed,Wind direction"
        newline2="DSN,g/m3,mg/m3,deg,W/m2,umol/m2/s,W/m2,W/m2,W/m2,W/m2,W/m2,kg/m/s2,W/m2,W/m2,W/m2,kPa,mm,frac,frac,frac,C,deg,C,m/s,m/s,deg,hPa,frac"
        newline3= "TIMESTAMP,Ah_Con,Cc,eta,Fa,Fc_Con,Fe_Con,Fg_Con,Fh_Con,Fld_Con,Flu_Con,Fm,Fn_Con,Fsd_Con,Fsu_Con,ps_Con,Precip_Con,Sws_Con,Sws_5cm,Sws_50cm,Ta_Con,theta,Ts_Con,ustar,Ws_CSAT_Con,Wd_CSAT,RH_Con,VPD_hPa_Con"
              
        columns_out  = ['DTcopy','Ah_Con','Cc','eta','Fa','Fc_Con','Fe_Con','Fg_Con','Fh_Con','Fld_Con','Flu_Con','Fm','Fn_Con','Fsd_Con','Fsu_Con','ps_Con','Precip_Con','Sws_Con','Sws_05','Sws_50','Ta_Con','theta','Ts_Con','ustar','Ws_CSAT_Con','Wd_CSAT','RH_Con','VPD_hPa_Con']             
        
        output_temp_filename=mypathforResults+'/WAVES_temp_'+Site_ID+'_'+str(year_index[0])+'_'+versionID +'.csv'
        output_filename=mypathforResults+'/WAVES_'+Site_ID+'_'+str(year_index[0])+'_'+versionID +'.csv'
        
        
        
        REddyProc_DF.to_csv(output_temp_filename, na_rep='-9999', float_format='%.3f', cols=columns_out, header=False, index=False, index_label=None, mode='w')
        
        #Now add another line with units
        #Open txt file
        with open(output_temp_filename) as infile:
            with open(output_filename,"w") as outfile:
                for i,line in enumerate(infile):
                    if i==0:
                        #outfile.write(newline1+"\n")
                        outfile.write(newline2+"\n")
                        outfile.write(newline3+"\n")      
                    else:
                        outfile.write(line)
        
        os.remove(output_temp_filename)
        
    #####################
    # Finish up
    ######################
    
    print "FINISHED writing out files for use for WAVES "
def Output_files(New_combined,myBaseforResults,Site_ID,versionID):
    New_combined_grouped=New_combined.groupby([lambda x: x.year])
    
    for year_index in New_combined_grouped:
        print year_index[0]
        
        print "Starting output for EddyProc MPI online tool"
        #Check for place to put results - does it exist? If not create
        if not os.path.isdir(myBaseforResults):
            os.mkdir(myBaseforResults)
        #Then subdirectories
        if not os.path.isdir(myBaseforResults+"/REddyProc"):
            os.mkdir(myBaseforResults+"/REddyProc")
        mypathforResults=myBaseforResults+"/REddyProc/"
        
        #Calculate RH_con
        New_combined['RH_Con']=metfuncs.RHfromabsolutehumidity(New_combined['Ah_Con'],New_combined['Ta_Con'])
        
        #Convert VPD in kPa to hPa.
        #We need to update VPD for input here so also need e and es
        # Calculate vapour pressure from absolute humidity and temperature
        #  Ah - absolute humidity, g/m3
        #  Ta - air temperature, C
        New_combined['VPD_hPa_Con']=(metfuncs.es(New_combined['Ta_Con']))-(metfuncs.vapourpressure(New_combined['Ah_Con'],New_combined['Ta_Con']))/10
        REddyProc_DF=New_combined[['Fc','Fe','Fh','Fg','Ta_Con','Ts_Con','RH_Con','VPD_hPa_Con','ustar']]
              
        #The date/time components are separated into columns. E.g. first column: julian day, second column: decimal hour. 
        #Possible date formats are indicated in the input form. Never use an hour of 24 with the time 
        #format 'year', 'month', 'day', 'hour', 'minute' (use 0 instead). Hour '0' is interpreted as first hour of the day, 
        #i.e. when you have transition from one to another it must be like (day, 23 --> day+1, 0) not like (day, 23 --> day, 0),
        #because then the data set is not chronological (this misunderstanding happened before).
        
        #REddyProc_DF['DT1','DT2','DT3','DT4','DT5','DT5']=REddyProc_DF.index.timetuple()
        REddyProc_DF['DTcopy']=REddyProc_DF.index
        
        REddyProc_DF['Day']=REddyProc_DF['DTcopy'].apply(lambda x: int(x.strftime('%j')))
        REddyProc_DF['Year']=REddyProc_DF['DTcopy'].apply(lambda x: int(x.strftime('%Y')))
        REddyProc_DF['Hour']=REddyProc_DF['DTcopy'].apply(lambda x: float(x.strftime('%H'))+(float(x.strftime('%M'))/60))
        
        #Select current year of yaer only
        REddyProc_DF=REddyProc_DF[REddyProc_DF['Year']==year_index[0]]
        
        #Calculate some things for plots
        n_datapoints=len(REddyProc_DF)
        startdate= REddyProc_DF.index[0]
        enddate= REddyProc_DF.index[n_datapoints-1]
        print n_datapoints,startdate,enddate
        
        
        header_names = ['Year','Day','Hour','NEE','LE','H' ,'Rg','Tair'  ,'Tsoil' ,'rH',    'VPD',        'Ustar']
        columns_out  = ['Year','Day','Hour','Fc', 'Fe','Fh','Fg','Ta_Con','Ts_Con','RH_Con','VPD_hPa_Con','ustar']
        
        newline1='Year \t DoY \t Hour \t NEE \t LE \t H \t Rg \t Tair \t Tsoil \t rH \t VPD \t Ustar'
        newline2=" -- \t -- \t -- \t umolm-2s-1 \t Wm-2 \t Wm-2 \t Wm-2 \t degC \t degC \t % \t hPa \t ms-1"
        
        #newline1='Year,Day,Hour,NEE,LE,H,Rg,Tair,Tsoil,rH,VPD,Ustar'
        #newline2="--,--,--,umolm-2s-1,Wm-2,Wm-2,Wm-2,degC,degC,%,hPa,ms-1"
        
        output_temp_filename=mypathforResults+'/REddyProc_temp_'+Site_ID+'_'+str(year_index[0])+'_'+versionID +'.txt'
        output_filename=mypathforResults+'/REddyProc_'+Site_ID+'_'+str(year_index[0])+'_'+versionID +'.txt'
        
        
        
        REddyProc_DF.to_csv(output_temp_filename, sep='\t', na_rep='-9999', float_format='%.3f', cols=columns_out, header=False, index=False, index_label=None, mode='w')
        
        #Now add another line with units
        #Open txt file
        with open(output_temp_filename) as infile:
            with open(output_filename,"w") as outfile:
                for i,line in enumerate(infile):
                    if i==0:
                        outfile.write(newline1+"\n")
                        outfile.write(newline2+"\n")
                    else:
                        outfile.write(line)
        
        os.remove(output_temp_filename)
        
    #####################
    # Finish up
    ######################
    
    print "FINISHED writing out files for use in EddyProc MPI online tool "
Example #6
0
def ANN_gapfill_func(myBaseforResults,New_combined,Site_ID,list_in,list_out,iterations,index_str,is_this_all,ANN_label_all,ANN_label,frequency,Use_Fc_Storage):

    ###########################################################################################################
    ##                 START MAIN CODE
    ###########################################################################################################
    if 'Fc' in list_out:
        units="umol.m-2.s-1"
    elif ('Fe' or 'Fh' or 'Fg') in list_out:
        units="W.m-2"
    else:
        units=" "
    
    ###### User-set IO file locations ######
    
    print "Starting ANN gap filling"
    #Check for place to put results - does it exist? If not create
    if not os.path.isdir(myBaseforResults):
        os.mkdir(myBaseforResults)
    #Then subdirectories
    if not os.path.isdir(myBaseforResults+"/ANN"):
        os.mkdir(myBaseforResults+"/ANN")
    mypathforResults=myBaseforResults+"/ANN"  
      
    #We need to update VPD for input here so also need e and es
    # Calculate vapour pressure from absolute humidity and temperature
    #  Ah - absolute humidity, g/m3
    #  Ta - air temperature, C
    New_combined['VPD_Con']=(metfuncs.es(New_combined['Ta_Con']))-(metfuncs.vapourpressure(New_combined['Ah_Con'],New_combined['Ta_Con']))
    
    number_of_inputs=len(list_in)
    number_of_outputs=len(list_out)
    #startdate=dt.date(2008,7,1)
    #enddate=dt.date(2008,8,1)
    alllist=list_in + list_out
    xnow=New_combined[alllist]                         #[startdate:enddate]
    xnow=xnow.dropna(how='any')
    #Drop nans and missing values so that Good data only is used in the training
    xarray=np.array(xnow.dropna().reset_index(drop=True))
    #Define inputs and targets for NN from DF
    inputs =  xarray[:, :number_of_inputs] #first 2 columns
    lastcolums=(-1*number_of_outputs)
    targets = xarray[:, lastcolums:] #last column
    
    # Generate standard layered network architecture and create network
    #different network architectures avaiable
    #conec = mlgraph((number_of_inputs,24,16,number_of_outputs))  # Creates standard multilayer network architecture
    conec = tmlgraph((number_of_inputs,24,16,number_of_outputs))  # Creates multilayer network full connectivity list
    #conec = imlgraph((number_of_inputs,24,16,number_of_outputs))  # Creates multilayer architecture with independent outputs
    
    net = ffnet(conec)
    
    print "TRAINING NETWORK..."
    net.train_tnc(inputs, targets, maxfun = iterations, messages=1)
    #net.train_rprop(inputs, targets, maxiter=iterations)
    #net.train_momentum(inputs, targets, maxfun = iterations, messages=1)
    #net.train_genetic(inputs, targets, maxfun = iterations, messages=1)
    #net.train_cg(inputs, targets, maxfun = iterations, messages=1)
    #net.train_bfgs(inputs, targets, maxfun = iterations, messages=1)
    
    
    # Test network
    print "TESTING NETWORK..."
    output, regression = net.test(inputs, targets, iprint = 0)
    
    print "R-squared:           %s  " %str(regression[0][2])
    #print "max. absolute error: %s  " %str(abs( array(output).reshape( len(output) ) - array(targets) ).max())
    output, regress = net.test(inputs, targets)
    
    #Create array for results. Then loop through elements on the original data to predict the ANN value
    predicted=np.empty((len(xarray),number_of_outputs))
    observed=np.empty((len(xarray),number_of_outputs))
    for index,rowdata in enumerate(xarray):
        predicted[index]=net([rowdata[0:number_of_inputs]])
        observed[index]=np.array(rowdata[-1.0*number_of_outputs : ])
	#observed[index]=np.array(rowdata[(-1.0*number_of_outputs)])
	
    ############################################    
    # Generate output and return new variables
    ############################################
    #Create a new variable called '_NN'
    for index, item in enumerate(list_out):
	ANN_label=str(item+"_NN")
	ANN_label_all=str(item+"_NN_all")
	if is_this_all == True:
	    New_combined[ANN_label_all]=net.call(New_combined[list_in])[:,index] 
	else:
	    New_combined[ANN_label]=net.call(New_combined[list_in])[:,index]    
    
    for index, item in enumerate(list_out):   
        #####################################################
        #  Plots 
        #####################################################
        #Plot time series of all 30 minute data
        mintimeseries_plot(mypathforResults,predicted,observed,regress,item, Site_ID,units,targets,output,list_out,index_str)
        #Plot regression of Tower versus ANN
        regressionANN2(mypathforResults,predicted,observed,regress,item, Site_ID,units,list_out,index_str)
        #Plot diurnals for every second month 6 graphs - only when enough months so all or annual
        if frequency=="all" or frequency=="annual" or is_this_all==True:
	    Doplots_diurnal_monthly(mypathforResults,New_combined,item, Site_ID,units,list_out,index_str,is_this_all)
        #Plot diurnals for every second month 6 graphs
        Doplots_diurnal(mypathforResults,New_combined,item, Site_ID,units,list_out,index_str,frequency)	
        #Plot timeseries of monthly over all periods
        Doplots_monthly(mypathforResults,New_combined,item, Site_ID,units,list_out,index_str,frequency)
                        
    return New_combined