-
Notifications
You must be signed in to change notification settings - Fork 1
/
trend.py
151 lines (128 loc) · 4.21 KB
/
trend.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#
# calculates trend, slope or pvalue for gridded data
#
# P.Wolski
# December 2016
#
#
# use: trend.py input.nc output.nc linear|sen|quantreg slope|pval
#
# returns output.nc of the same lat lon dimensions as input.nc
# input.nc has to have one 3D variable with dimensions: lat,lon,time (not necessarily in this sequence)
# pval for Sen slope is calculated as pval for Mann-Kendall test
# for linear and quantreg - from analytical expressions
#
import numpy as np
from netCDF4 import Dataset
import sys, os
import statsmodels.api as sm
from statsmodels.regression.linear_model import OLS
from statsmodels.regression.quantile_regression import QuantReg
from scipy.stats import mstats, kendalltau
#to remove duplicate warnings
import warnings
warnings.filterwarnings('ignore')
infile=sys.argv[1]
outfile=sys.argv[2]
trend=sys.argv[3]
what=sys.argv[4]
#
def get_TheilSen(_y, what="slope"):
if not np.ma.is_masked(_y):
if what=="slope":
return mstats.theilslopes(np.ma.masked_invalid(_y))[0]
else:
_x=np.arange(len(_y))
return kendalltau(_x, _y, nan_policy='omit')[1]
return np.nan
def get_linear(_y, what="slope"):
# receives
# data need to be regularly spaced
# returns slope in units of _y per unit of _x, intercept in units of _y or pvalue
# pvalue is analytical, perhaps one day I will implement bootstrap
#
# need to add constant for OLS does not include intercept by default
if not np.ma.is_masked(_y):
_x = sm.add_constant(np.arange(len(_y)))
res=sm.OLS(_y, _x, missing='drop').fit()
if what=="slope":
return res.params[1]
elif what=="pval":
return res.pvalues[1]
elif what=="intercept":
return res.params[0]
else:
return np.nan
def get_quantreg(_y, what="slope", q=0.5):
if not np.ma.is_masked(_y):
_x = sm.add_constant(np.arange(len(_y)))
res=QuantReg(_y, _x).fit(q=0.5)
if what=="slope":
return res.params[1]
elif what=="pval":
return res.pvalues[1]
elif what=="intercept":
return res.params[0]
else:
return np.nan
# this reads a netCDF file with three dimensions: lon, lat and time, containing one variable
# trends are calculated on the entire scene, save the areas that have NaNs
ncdata=Dataset(infile, "r", format='NETCDF4')
#finding variable name
found=False
for v in ncdata.variables.keys():
vdata=ncdata.variables[v]
if v=="lat" or v=="latitude":
latname=v
lats=vdata[:]
nlat=len(lats)
if v=="lon" or v=="longitude":
lonname=v
lons=vdata[:]
nlon=len(lons)
if v=="time" or v=="date":
timename=v
time=vdata[:]
nts=len(time)
if vdata.ndim==3:
vname=v
data0=ncdata.variables[v][:]
found=True
print vname
if found==False:
print "no appropriate variable found in the source file"
sys.exit()
#reorder the input array, so that it is time, latitude, longitude
temp=ncdata.variables[v]
latindx=temp.dimensions.index(latname)
lonindx=temp.dimensions.index(lonname)
timeindx=temp.dimensions.index(timename)
data=np.moveaxis(data0,(timeindx,latindx,lonindx), (0,1,2))
if trend=="linear":
res=np.ma.apply_along_axis(get_linear, 0, data, what=what)
elif trend=="TheilSen":
res_sen=np.ma.apply_along_axis(get_TheilSen, 0, data, what=what)
elif trend=="quantile":
res_quant=np.ma.apply_along_axis(get_quantreg, 0, data, what=what)
else:
print "unknown trend type: "+ trend
print "exiting..."
sys.exit()
ncdataset=Dataset(outfile, "w", format='NETCDF3_CLASSIC')
ncdataset.createDimension('lon', nlon)
ncdataset.createDimension('lat', nlat)
longitudes = ncdataset.createVariable('lon',np.float32, ('lon',))
latitudes = ncdataset.createVariable('lat',np.float32, ('lat',))
latitudes.long_name = "latitude" ;
latitudes.units = "degrees_north" ;
longitudes.long_name = "longitude" ;
longitudes.units = "degrees_east" ;
longitudes[:]=lons
latitudes[:]=lats
v=ncdataset.createVariable(what+"_"+trend,np.float32,('lat', 'lon',))
v[:,:]=res
if what=="slope":
v.units="variable unit per time step"
if what=="pval":
v.units="-"
ncdataset.close()