def sum_swe_mod(lats, lons, swe, basin, model, scenario, month): import numpy as np import math from snowpack_functions import lat_lon_adjust, get_dist, calc_area, unpack_netcdf_swe_month, mask_latlon resol = 0.0625 swe_yearly_agg = np.ndarray(shape=(len(swe), 1), dtype=float) ## get historical SWE for model so that historical mean can be determined for mask lats_hist, lons_hist, swe_hist, datess_hist = unpack_netcdf_swe_month( basin, model, 'historical', month) ## arrays for latitude and longitude values included in the historical mean mask lats_lons_inc_in_mask = list() ################# loop through array and get latitude, longitude and swe values for each ############# count_appending_latslons = 0 for i in np.arange(len(swe)): ### loop over year count_appending_latslons += 1 ## initialize value of SWE for each year to zero swe_gridcell_total = 0 for j in np.arange(len(lats)): ### loop over latitude for k in np.arange(len(lons)): ### loop over longitude ### don't calculate area for missing value elements if (math.isnan(swe[i, j, k])) == False: ## REMOVE ADDITIONAL GRID CELLS ACCORDING TO LAT_LON_ADJUST FOR BOXES AND ADJUSTMENTS (LATER MASKS) if_in_box = mask_latlon(lats[j], lons[k], basin) adjust_mask = lat_lon_adjust(lats[j], lons[k], basin) if if_in_box and adjust_mask: historical_sum_swe = 0 ## CALCULATE MEAN HISTORICAL SWE for year in np.arange(len(swe_hist)): historical_sum_swe += swe_hist[year, j, k] mean_swe = historical_sum_swe / len(swe_hist) ########################### EXCLUDE GRID CELLS WITH MEAN HISTORICAL SWE < 10 MM if (mean_swe >= 10): ### calculate area of grid cell cellarea = calc_area(lats[j], lons[k], resol) ## calculate amount of swe in each grid cell: swe (in mm) * area of grid cell; adjust for units swe_gridcell = cellarea * (swe[i, j, k] * 0.000001) ## sum up swe swe_gridcell_total += swe_gridcell ## ONLY ADD LATS/LONS TO LIST FOR FIRST LOOP if (count_appending_latslons < 2): points = [lats[j], lons[k]] lats_lons_inc_in_mask.append(points) ## summed up yearly April 1 aggregate snowpack swe_yearly_agg[i] = swe_gridcell_total ##### save array to files for a multimodel average ## define path based on scenario filearrayname = '/raid9/gergel/agg_snowpack/%s/%s_%s_%s.npz' % ( scenario, model, basin, month) np.savez(filearrayname, swe=swe_yearly_agg, latslons=np.asarray(lats_lons_inc_in_mask)) return (swe_yearly_agg, lats_lons_inc_in_mask)
def sum_swe_mod(lats,lons,swe,basin,model,scenario,month): import numpy as np import math from snowpack_functions import lat_lon_adjust,get_dist,calc_area,unpack_netcdf_swe_month,mask_latlon resol=0.0625 swe_yearly_agg = np.ndarray(shape = (len(swe),1), dtype=float) ## get historical SWE for model so that historical mean can be determined for mask lats_hist,lons_hist,swe_hist,datess_hist = unpack_netcdf_swe_month(basin,model,'historical',month) ## arrays for latitude and longitude values included in the historical mean mask lats_lons_inc_in_mask = list() ################# loop through array and get latitude, longitude and swe values for each ############# count_appending_latslons = 0 for i in np.arange(len(swe)): ### loop over year count_appending_latslons += 1 ## initialize value of SWE for each year to zero swe_gridcell_total = 0 for j in np.arange(len(lats)): ### loop over latitude for k in np.arange(len(lons)): ### loop over longitude ### don't calculate area for missing value elements if (math.isnan(swe[i,j,k])) == False: ## REMOVE ADDITIONAL GRID CELLS ACCORDING TO LAT_LON_ADJUST FOR BOXES AND ADJUSTMENTS (LATER MASKS) if_in_box = mask_latlon(lats[j],lons[k],basin) adjust_mask = lat_lon_adjust(lats[j],lons[k],basin) if if_in_box and adjust_mask: historical_sum_swe = 0 ## CALCULATE MEAN HISTORICAL SWE for year in np.arange(len(swe_hist)): historical_sum_swe += swe_hist[year,j,k] mean_swe = historical_sum_swe/len(swe_hist) ########################### EXCLUDE GRID CELLS WITH MEAN HISTORICAL SWE < 10 MM if (mean_swe >= 10): ### calculate area of grid cell cellarea = calc_area(lats[j],lons[k],resol) ## calculate amount of swe in each grid cell: swe (in mm) * area of grid cell; adjust for units swe_gridcell = cellarea*(swe[i,j,k]*0.000001) ## sum up swe swe_gridcell_total += swe_gridcell ## ONLY ADD LATS/LONS TO LIST FOR FIRST LOOP if (count_appending_latslons < 2): points = [lats[j],lons[k]] lats_lons_inc_in_mask.append(points) ## summed up yearly April 1 aggregate snowpack swe_yearly_agg[i] = swe_gridcell_total ##### save array to files for a multimodel average ## define path based on scenario filearrayname = '/raid9/gergel/agg_snowpack/%s/%s_%s_%s.npz' %(scenario,model,basin,month) np.savez(filearrayname,swe=swe_yearly_agg,latslons=np.asarray(lats_lons_inc_in_mask)) return (swe_yearly_agg,lats_lons_inc_in_mask)
## further mask out latlons that aren't part of the masks defined by lat_lon_adjust and mask_latlon direc = '/raid9/gergel/agg_snowpack/snotel_vic/vic_output/%s' %basin site_ids = list() for filename in os.listdir(direc): ## get list of snotel site ids site_ids.append(filename) if "11H59S" in site_ids: ## this is a missing snotel station in the Southern Rockies site_ids.remove("11H59S") arr_site_ids = np.asarray(site_ids) vic_swe = list() for site in arr_site_ids: direcsite = '/raid9/gergel/agg_snowpack/snotel_vic/vic_output/%s/%s/fluxes__*' %(basin,site) for pathfile in glob.glob(direcsite): path,fname = os.path.split(pathfile) elev,lat,lon = get_snotel_elevation(site) snow_band,lat,lon = get_snow_band(fname,elev) ## get which snowband to use for snotel elevation mask1 = lat_lon_adjust(float(lat),float(lon),basin) mask2 = mask_latlon(float(lat),float(lon),basin) if mask1 and mask2: ## apply further masking: include grid cell IF within mask if snow_band == 0: data = np.loadtxt(pathfile,dtype='float',usecols=(3,),delimiter='\t') elif snow_band == 1: data = np.loadtxt(pathfile,dtype='float',usecols=(4,),delimiter='\t') elif snow_band == 2: data = np.loadtxt(pathfile,dtype='float',usecols=(5,),delimiter='\t') elif snow_band == 3: data = np.loadtxt(pathfile,dtype='float',usecols=(6,),delimiter='\t') else: data = np.loadtxt(pathfile,dtype='float',usecols=(7,),delimiter='\t') vic_swe.append(data[:]) ## add to vic swe list ## step 3: average over all vic simulations
## get historical SWE data to determine which lats/lons to include in analysis lats, lons, swe_hist, datess_swe_hist = unpack_netcdf_gen( "SWE", basin, "historical") ## get RD, SD, TR historical classifications for j in np.arange(len(lats)): ## loop over latitude for k in np.arange(len(lons)): ## loop over longitude for i in np.arange(len(swe)): ### loop over year ## array for temp average temp_averrage = list() ### don't calculate area for missing value elements if (math.isnan(swe[i, j, k])) == False: ## REMOVE ADDITIONAL GRID CELLS ACCORDING TO LAT_LON_ADJUST FOR BOXES AND ADJUSTMENTS (LATER MASKS) if_in_box = mask_latlon(lats[j], lons[k], basin) adjust_mask = lat_lon_adjust(lats[j], lons[k], basin) if if_in_box and adjust_mask: historical_sum_swe = 0 ## CALCULATE MEAN HISTORICAL SWE for year in np.arange(len(swe_hist)): historical_sum_swe += swe_hist[year, j, k] mean_swe = historical_sum_swe / len(swe_hist) ## EXCLUDE GRID CELLS WITH MEAN HISTORICAL SWE < 10 MM if (mean_swe >= 10): ind = i * 5 temp_maxx = ( temp_max[ind, j, k] + temp_max[ind + 1, j, k] + temp_max[ind + 2, j, k] + temp_max[ind + 3, j, k] + temp_max[ind + 4, j, k]) / 5 temp_minn = ( temp_min[ind, j, k] + temp_min[ind + 1, j, k] +
petshort = petshort[237:-74,:,:] pettall = pettall[237:-74,:,:] evap = evap[237:-74,:,:] transp = transp[237:-74,:,:] else: petnat = petnat[45:-2,:,:] petshort = petshort[45:-2,:,:] pettall = pettall[45:-2,:,:] evap = evap[45:-2,:,:] transp = transp[45:-2,:,:] for j in np.arange(len(lats)): ## loop over latitude for k in np.arange(len(lons)): ## loop over longitude ### don't calculate area for missing value elements if (math.isnan(swe[0,j,k])) == False: if_in_box = mask_latlon(lats[j],lons[k],basin) adjust_mask = lat_lon_adjust(lats[j],lons[k],basin) mean_swe = historical_sum_swe(j,k) ## new historical swe function based on livneh instead of vic simulations if if_in_box and adjust_mask and mean_swe: petsum = list() aetsum = list() if (scenario == "historical"): for i in np.arange(30): ## now loop over year ind = i*12 petsum.append(np.sum(np.asarray(petnat[ind:ind+12,j,k])) + np.sum(np.asarray(petshort[ind:ind+12,j,k])) + np.sum(np.asarray(pettall[ind:ind+12,j,k]))) aetsum.append(np.sum(np.asarray(evap[ind:ind+12,j,k])) + np.sum(np.asarray(transp[ind:ind+12,j,k]))) pet_agg.append(np.asarray(petsum).reshape(len(np.asarray(petsum)),1)) aet_agg.append(np.asarray(aetsum).reshape(1,len(np.asarray(aetsum)),1)) lats_inc.append(lats[j]) lons_inc.append(lons[k]) else:
def swe_percs(lats,lons,swe,datess, elev_corr_info, basin,scenario): import numpy as np import math from snowpack_functions import lat_lon_adjust,get_dist,calc_area,unpack_netcdf_swe_ensavg,mask_latlon,get_elev_for_lat_lon ## initialize lists for percentiles of elevations swe_10 = list() swe_25 = list() swe_50 = list() swe_75 = list() swe_90 = list() ## get historical SWE for model so that historical mean can be determined for mask lats_hist,lons_hist,swe_hist,datess_hist = unpack_netcdf_swe_ensavg(basin,'historical') for i in np.arange(len(swe)): ### loop over year ## start a new list of SWE and Elevation points every year swe_inc = list() elev_inc = list() ## initialize SWE year total to zero swe_tot = 0 for j in np.arange(len(lats)): ### loop over latitude for k in np.arange(len(lons)): ### loop over longitude ### don't calculate area for missing value elements if (math.isnan(swe[i,j,k])) == False: ## REMOVE ADDITIONAL GRID CELLS ACCORDING TO LAT_LON_ADJUST FOR BOXES AND ADJUSTMENTS (LATER MASKS) if_in_box = mask_latlon(lats[j],lons[k],basin) adjust_mask = lat_lon_adjust(lats[j],lons[k],basin) if if_in_box and adjust_mask: historical_sum_swe = 0 ## CALCULATE MEAN HISTORICAL SWE for year in np.arange(len(swe_hist)): historical_sum_swe += swe_hist[year,j,k] mean_swe = historical_sum_swe/len(swe_hist) ########################### EXCLUDE GRID CELLS WITH MEAN HISTORICAL SWE < 10 MM if (mean_swe >= 10): # get swe and elevation values and add to list for each grid cell for the given year swe_inc.append(swe[i,j,k]) elev_inc.append(get_elev_for_lat_lon(elev_corr_info,lats[j],lons[k])) ## get total value of swe for given year swe_tot += swe[i,j,k] ## total value of swe for given year is swe_tot ## sort elev_inc by elevation and swe_inc in same way (ascending order) elev_sorted = sorted(elev_inc) swe_sorted = [swe_inc for (elev_inc,swe_inc) in sorted(zip(elev_inc,swe_inc))] print("the length of swe_sorted is %f" %len(swe_sorted)) ## index lists for percentiles p10 = list() p25 = list() p50 = list() p75 = list() p90 = list() swe_cumsum = np.cumsum(swe_sorted) for num in np.arange(len(swe_cumsum)): if (swe_cumsum[num] >= 0.9*swe_tot): p90.append(num) print(num) elif (swe_cumsum[num] >= 0.75*swe_tot): p75.append(num) print("25 percent") elif (swe_cumsum[num] >= 0.5*swe_tot): p50.append(num) elif (swe_cumsum[num] >= 0.25*swe_tot): p25.append(num) elif (swe_cumsum[num] >= 0.1*swe_tot): p10.append(num) else: print("this swe sum is small") swe_10.append(elev_sorted[np.min(p10)]) swe_25.append(elev_sorted[np.min(p25)]) swe_50.append(elev_sorted[np.min(p50)]) swe_75.append(elev_sorted[np.min(p75)]) swe_90.append(elev_sorted[np.min(p90)]) return (datess, swe_10,swe_25,swe_50,swe_75,swe_90)
site_ids = list() for filename in os.listdir(direc): ## get list of snotel site ids site_ids.append(filename) if "11H59S" in site_ids: ## this is a missing snotel station in the Southern Rockies site_ids.remove("11H59S") arr_site_ids = np.asarray(site_ids) vic_swe = list() for site in arr_site_ids: direcsite = '/raid9/gergel/agg_snowpack/snotel_vic/vic_output/%s/%s/fluxes__*' % ( basin, site) for pathfile in glob.glob(direcsite): path, fname = os.path.split(pathfile) elev, lat, lon = get_snotel_elevation(site) snow_band, lat, lon = get_snow_band( fname, elev) ## get which snowband to use for snotel elevation mask1 = lat_lon_adjust(float(lat), float(lon), basin) mask2 = mask_latlon(float(lat), float(lon), basin) if mask1 and mask2: ## apply further masking: include grid cell IF within mask if snow_band == 0: data = np.loadtxt(pathfile, dtype='float', usecols=(3, ), delimiter='\t') elif snow_band == 1: data = np.loadtxt(pathfile, dtype='float', usecols=(4, ), delimiter='\t') elif snow_band == 2: data = np.loadtxt(pathfile, dtype='float',
def swe_elevation_mean(lats,lons,swe,datess,elev_corr_info,basin,scenario): import numpy as np import math from snowpack_functions import unpack_netcdf_file_var,get_elev_for_lat_lon,mask_latlon,lat_lon_adjust ## get historical SWE for model so that historical mean can be determined for mask direc = '/raid9/gergel/agg_snowpack/goodleap/%s' %basin file_hist = 'SWE_ensavg_%s_%s.nc' %("historical",basin) file = 'SWE_ensavg_%s_%s.nc' %(scenario,basin) lats_hist,lons_hist,swe_hist,datess_hist = unpack_netcdf_file_var(direc,file_hist,"swe") ## list for latitude and longitude values included in the historical mean mask lats_lons_inc_in_mask = list() ## initialize lists for storing grid cell elevations if (scenario == "historical"): swe_inc = list() elev_inc = list() else: swe_1 = list() elev_1 = list() swe_2 = list() elev_2 = list() swe_3 = list() elev_3 = list() count = 0 ################# loop through array and get latitude, longitude and swe values for each ############# for j in np.arange(len(lats)): ### loop over latitude for k in np.arange(len(lons)): ### loop over longitude ### don't calculate area for missing value elements ### isnan will convert masked values to nan's, so this statement will evaluate to false if ### the value exists (and thus is not masked) historical_mean_swe = 0 ## calculate historical mean SWE for grid cell if (math.isnan(swe[0,j,k]) == False): if_in_box = mask_latlon(lats[j],lons[k],basin) adjust_mask = lat_lon_adjust(lats[j],lons[k],basin) if if_in_box and adjust_mask: historical_sum_swe = 0 for year in np.arange(len(swe_hist)): historical_sum_swe += swe_hist[year,j,k] historical_mean_swe = historical_sum_swe/len(swe_hist) if (historical_mean_swe >= 10): count += 1 ## get elevation of grid cell elevation_individual_gridcell = get_elev_for_lat_lon(elev_corr_info,lats[j],lons[k]) ## add swe and elevation values to lists depending on scenario if (scenario == "historical"): sum_swe = 0 for year in np.arange(len(swe)): ## calculate mean swe sum_swe += swe[year,j,k] mean_swe = sum_swe/len(swe) mean_swe = mean_swe*0.001 # convert mm to meters swe_inc.append(mean_swe) elev_inc.append(elevation_individual_gridcell) else: sum_swe = 0 for year in np.arange(4,34): sum_swe += swe[year,j,k] mean_swe = sum_swe/len(np.arange(4,33)) mean_swe = mean_swe*0.001 # convert mm to meters swe_1.append(mean_swe) elev_1.append(elevation_individual_gridcell) sum_swe = 0 for year in np.arange(34,64): sum_swe += swe[year,j,k] mean_swe = sum_swe/len(np.arange(4,33)) mean_swe = mean_swe*0.001 # convert mm to meters swe_2.append(mean_swe) elev_2.append(elevation_individual_gridcell) sum_swe = 0 for year in np.arange(64,94): sum_swe += swe[year,j,k] mean_swe = sum_swe/len(np.arange(4,33)) mean_swe = mean_swe*0.001 # convert mm to meters swe_3.append(mean_swe) elev_3.append(elevation_individual_gridcell) print(count) ##### save arrays to files for a multimodel average (and for spatial plots with lats and lons) ## define path based on scenario filearrayname = '/raid9/gergel/agg_snowpack/elevations/ensavg_%s_%s.npz' %(basin,scenario) if (scenario == "historical"): np.savez(filearrayname,swe=np.asarray(swe_inc),elevations=np.asarray(elev_inc)) return (swe_inc,elev_inc) else: np.savez(filearrayname,swe1=np.asarray(swe_1),swe2=np.asarray(swe_2),swe3=np.asarray(swe_3),elev1=np.asarray(elev_1),elev2=np.asarray(elev_2),elev3=np.asarray(elev_3)) return (swe_1,swe_2,swe_3,elev_1,elev_2,elev_3)
def swe_elevation_mean(lats, lons, swe, datess, elev_corr_info, basin, scenario): import numpy as np import math from snowpack_functions import unpack_netcdf_file_var, get_elev_for_lat_lon, mask_latlon, lat_lon_adjust ## get historical SWE for model so that historical mean can be determined for mask direc = '/raid9/gergel/agg_snowpack/goodleap/%s' % basin file_hist = 'SWE_ensavg_%s_%s.nc' % ("historical", basin) file = 'SWE_ensavg_%s_%s.nc' % (scenario, basin) lats_hist, lons_hist, swe_hist, datess_hist = unpack_netcdf_file_var( direc, file_hist, "swe") ## list for latitude and longitude values included in the historical mean mask lats_lons_inc_in_mask = list() ## initialize lists for storing grid cell elevations if (scenario == "historical"): swe_inc = list() elev_inc = list() else: swe_1 = list() elev_1 = list() swe_2 = list() elev_2 = list() swe_3 = list() elev_3 = list() count = 0 ################# loop through array and get latitude, longitude and swe values for each ############# for j in np.arange(len(lats)): ### loop over latitude for k in np.arange(len(lons)): ### loop over longitude ### don't calculate area for missing value elements ### isnan will convert masked values to nan's, so this statement will evaluate to false if ### the value exists (and thus is not masked) historical_mean_swe = 0 ## calculate historical mean SWE for grid cell if (math.isnan(swe[0, j, k]) == False): if_in_box = mask_latlon(lats[j], lons[k], basin) adjust_mask = lat_lon_adjust(lats[j], lons[k], basin) if if_in_box and adjust_mask: historical_sum_swe = 0 for year in np.arange(len(swe_hist)): historical_sum_swe += swe_hist[year, j, k] historical_mean_swe = historical_sum_swe / len(swe_hist) if (historical_mean_swe >= 10): count += 1 ## get elevation of grid cell elevation_individual_gridcell = get_elev_for_lat_lon( elev_corr_info, lats[j], lons[k]) ## add swe and elevation values to lists depending on scenario if (scenario == "historical"): sum_swe = 0 for year in np.arange( len(swe)): ## calculate mean swe sum_swe += swe[year, j, k] mean_swe = sum_swe / len(swe) mean_swe = mean_swe * 0.001 # convert mm to meters swe_inc.append(mean_swe) elev_inc.append(elevation_individual_gridcell) else: sum_swe = 0 for year in np.arange(4, 34): sum_swe += swe[year, j, k] mean_swe = sum_swe / len(np.arange(4, 33)) mean_swe = mean_swe * 0.001 # convert mm to meters swe_1.append(mean_swe) elev_1.append(elevation_individual_gridcell) sum_swe = 0 for year in np.arange(34, 64): sum_swe += swe[year, j, k] mean_swe = sum_swe / len(np.arange(4, 33)) mean_swe = mean_swe * 0.001 # convert mm to meters swe_2.append(mean_swe) elev_2.append(elevation_individual_gridcell) sum_swe = 0 for year in np.arange(64, 94): sum_swe += swe[year, j, k] mean_swe = sum_swe / len(np.arange(4, 33)) mean_swe = mean_swe * 0.001 # convert mm to meters swe_3.append(mean_swe) elev_3.append(elevation_individual_gridcell) print(count) ##### save arrays to files for a multimodel average (and for spatial plots with lats and lons) ## define path based on scenario filearrayname = '/raid9/gergel/agg_snowpack/elevations/ensavg_%s_%s.npz' % ( basin, scenario) if (scenario == "historical"): np.savez(filearrayname, swe=np.asarray(swe_inc), elevations=np.asarray(elev_inc)) return (swe_inc, elev_inc) else: np.savez(filearrayname, swe1=np.asarray(swe_1), swe2=np.asarray(swe_2), swe3=np.asarray(swe_3), elev1=np.asarray(elev_1), elev2=np.asarray(elev_2), elev3=np.asarray(elev_3)) return (swe_1, swe_2, swe_3, elev_1, elev_2, elev_3)
] direc_snotel = '/raid9/gergel/vic_sim_obs/snotel_data/US_swe' snotel_swe = list() #snotel_swe = np.ndarray(shape=(len(arr_site_ids),len(arr_dates)),dtype=float) rowcount = 0 for site in arr_site_ids: snotel_site_swe = list() snotel_dates = list() print(site) filename = 'swe.%s.dat' % site elev, lat, lon = get_snotel_elevation( site) ## get elevation of snotel site lat_sno, lon_sno = find_gridcell( float(lat), float(lon)) ## figure out which gridcell the snotel site is in mask3 = lat_lon_adjust(float(lat_sno), float(lon_sno), basin) ## apply first lat/lon mask mask4 = mask_latlon(float(lat_sno), float(lon_sno), basin) ## apply second lat/lon mask if mask3 and mask4: snotel_data = np.loadtxt(os.path.join(direc_snotel, filename), dtype='str', delimiter='\t') for day in np.arange(len(snotel_data)): eachday = snotel_data[day].split() if np.float(eachday[0][:4]) >= 1987 and np.float( eachday[0][:4]) <= 2005: snotel_dates.append( datetime.datetime.strptime(eachday[0], '%Y%m%d')) snotel_site_swe.append(np.float(eachday[1])) arr_snotel_site_swe = np.asarray(snotel_site_swe) print(len(arr_snotel_site_swe))
base = datetime.datetime(1987, 1, 1) ## end date + 1 (will only produce specified end date - 1) end_date = datetime.datetime(2006, 1, 1) arr_dates = [base + datetime.timedelta(days=i) for i in range(0, (end_date-base).days)] direc_snotel = '/raid9/gergel/vic_sim_obs/snotel_data/US_swe' snotel_swe = list() #snotel_swe = np.ndarray(shape=(len(arr_site_ids),len(arr_dates)),dtype=float) rowcount = 0 for site in arr_site_ids: snotel_site_swe = list() snotel_dates = list() print(site) filename = 'swe.%s.dat' %site elev,lat,lon = get_snotel_elevation(site) ## get elevation of snotel site lat_sno,lon_sno = find_gridcell(float(lat),float(lon)) ## figure out which gridcell the snotel site is in mask3 = lat_lon_adjust(float(lat_sno),float(lon_sno),basin) ## apply first lat/lon mask mask4 = mask_latlon(float(lat_sno),float(lon_sno),basin) ## apply second lat/lon mask if mask3 and mask4: snotel_data = np.loadtxt(os.path.join(direc_snotel,filename),dtype='str',delimiter='\t') for day in np.arange(len(snotel_data)): eachday = snotel_data[day].split() if np.float(eachday[0][:4]) >= 1987 and np.float(eachday[0][:4]) <= 2005: snotel_dates.append(datetime.datetime.strptime(eachday[0],'%Y%m%d')) snotel_site_swe.append(np.float(eachday[1])) arr_snotel_site_swe = np.asarray(snotel_site_swe) print(len(arr_snotel_site_swe)) arr_snotel_site_swe[arr_snotel_site_swe < 0]=np.nan ## change -99 values in swe to nan # snotel_swe.append(arr_snotel_site_swe) ## deal with missing values using pandas merge df_full = pd.DataFrame({'cola':arr_dates}) df_part = pd.DataFrame({'cola':snotel_dates,'swe':arr_snotel_site_swe.tolist()})