Exemple #1
0
def legacy_wrapper(data_test0, subchunk, b_deb):
    """
    Legacy fortran wrapper that loop on each pixel
    """

    res = np.empty([subchunk.dim[1], 4])
    res[:] = np.nan  # NaN matrix by default

    for ii_sub in range(subchunk.dim[1]):
        #if b_deb: print('---------------')
        #if b_deb: print('jj: {} - ii: {} '.format(jj_sub, ii_sub))

        data_test = data_test0[:, ii_sub]

        ## remove tie group
        data_test[1:][np.diff(data_test) == 0.] = np.nan

        if b_deb:
            print('Data valid:', data_test.size - np.isnan(data_test).sum(),
                  '/', data_test.size)

        if 1:
            ## original mann-kendall test :
            bla = data_test[~np.isnan(data_test)]
            if bla.size > 0:
                #print('min/mean/max/nb/nb_unique', bla.min(), bla.mean(), bla.max(), len(bla), len(np.unique(bla)))
                #print(bla)
                if len(np.unique(bla)) == 1:
                    p, z, Sn, nx = [0, 0, 0, 0]
                else:

                    p, z, Sn, nx = m.mk_trend(len(data_test),
                                              np.arange(len(data_test)),
                                              data_test)
            else:
                p, z, Sn, nx = m.mk_trend(len(data_test),
                                          np.arange(len(data_test)), data_test)
                # if data_test = [], the test return (p,z,Sn,nx) = (1.0, 0.0, 0.5, 0.0)
        else:
            ## other test
            p, z, Sn, nx = [0, 0, 0, 0]
            z = data_test.mean()

        #if b_deb: print('p,z,Sn,nx', p,z,Sn,nx)

        res[ii_sub, 0] = p
        res[ii_sub, 1] = z
        res[ii_sub, 2] = Sn
        res[ii_sub, 3] = nx

    if b_deb:
        print('p,z,Sn,nx')
        print(res)

    #return (res[:,0], res[:,1], res[:,2], res[:,3])
    return res
def mk_test(y):
    n_zero = (y == 0.0).sum()
    #print("{:.4f} / {} / {} / {}".format(np.isnan(y).sum()/len(y), np.nanmin(y), np.nanmax(y), n_zero))
    ## To avoid 0.0 tie that makes the sort algo of median fortran computation stuck in very long loops,
    ## we add very low fake noise to all 0.0 values to allow the sort algo to perform efficiently.
    y[y == 0.0] = 1.e-6 * np.random.rand(n_zero)
    return m.mk_trend(len(y), np.arange(len(y)), y, 3)
Exemple #3
0
 def proc(y):
     n_zero = (y == 0.0).sum()
     #print("{:.4f} / {} / {} / {}".format(np.isnan(y).sum()/len(y), np.nanmin(y), np.nanmax(y), n_zero))
     ## To avoid 0.0 tie that makes the sort algo of median fortran computation stuck in very long loops,
     ## we add very low fake noise to all 0.0 values to allow the sort algo to perform efficiently.
     y[y == 0.0] = 1.e-6 * np.random.rand(n_zero)
     if not p_test[y.name]:
         if b_deb: print("INFO: p-value > 0.05 for {}".format(y.name))
         return tuple([np.nan] * 4)
     return m.mk_trend(len(y), np.arange(len(y)), y, 2)
Exemple #4
0
x = np.arange(freq * 10) / (1. * freq)

y = np.sin(2 * np.pi * x)

slope = 0.2  # [1/year]

# sinus + slope
y += slope * x
# random + slope only
#y = 0.1*np.random.rand(len(x)) + slope*x

#plt.plot(x,y)
#plt.show()

p, z, Sn, nx = m.mk_trend(len(y), np.arange(len(y)), y)
print('p, z, Sn, nx:')
print(p, z, Sn, nx)

slope2, intercept, lo_slope, up_slope = mstats.theilslopes(y)
print('slope2, intercept, lo_slope, up_slop:')
print(slope2, intercept, lo_slope, up_slope)

res_smk = sk.seakeni(y, 365)
print(res_smk)

print('Summary:')
print("mk fortran : {}, err[%] = {:.2f}".format(
    Sn * freq, 100 * (slope - Sn * freq) / slope))
print("mk scipy   : {}, err[%] = {:.2f}".format(
    slope2 * freq, 100 * (slope - slope2 * freq) / slope))
Exemple #5
0
def processInput_trends(subchunk, parent_iteration, child_iteration):
    """This is the main file that calculate trends"""

    #print('INFO: see pid.<pid>.out to monitor trend computation progress')
    #sys.stdout = open('pid.'+str(os.getpid()) + '.out', 'w')
    
    #print('INFO: see trend.out to monitor trend computation progress')
    #sys.stdout = open('trend.out', 'a')

    ## Debug tool to print process Ids
    process = psutil.Process(os.getpid())
    current = current_process()
    print(process, current._identity, '{} Mo'.format(process.memory_info().rss/1024/1024))

    if subchunk.input=='box':
        print('### Chunk {} > subchunk {} started: COL: [{}:{}] ROW: [{}:{}]'.format(parent_iteration, child_iteration, *subchunk.get_limits('local', 'str')))
        write_string0 = (param.hash+"_CHUNK" + np.str(parent_iteration)  
                         + "_SUBCHUNK" + np.str(child_iteration)   
                         + "_" + '_'.join(subchunk.get_limits('global', 'str'))
                         + '.nc')
        subchunk_fname = param.output_path / write_string0
        ## Check if cache file already exists and must be overwritten
        if not param.b_delete:
            if subchunk_fname.is_file():
                print ('INFO: {} already exists. Use -d option to overwrite it.'.format(write_string0))
                return

    elif subchunk.input=='points':
        print('### Chunk {} > subchunk {} started.'.format(parent_iteration, child_iteration))
        print(param.input_file)
        str_date_range = param.input_file.stem.replace('timeseries','')
        write_string0 = 'merged_trends{}.h5'.format(str_date_range) 
        subchunk_fname = param.output_path / write_string0
        ## Result file is always overwritten in the case of point input

    ## Read the input time series file from main chunk, configured length of time X 500 X 500; it may vary if different chunks are used
    hdf_ts = h5py.File(param.input_file, 'r')

    ## Create temporary storage with size of sub chunks in main chunk, currently configured 100 by 100 blocks
    var_temp_output = np.empty([*subchunk.dim,4])    
    var_temp_output[:] = np.nan
    # NaN matrix by default

    
    ## Parameters for te loop
    b_deb = 0 # flag to print time profiling
    t00 = timer()
    t000 = timer()
    t_mean = 0.
    #print(current._identity, f'{process.memory_info().rss/1024/1024} Mo')
    offsetx = subchunk.get_limits('local', 'tuple')[0]
    offsety = subchunk.get_limits('local', 'tuple')[2]

    print_freq = 20

    tab_prof_valid = []
    tab_prof_zero = []

    hf = h5py.File(subchunk_fname, 'w')

    for tsvar in hdf_ts['vars'].keys():

        for jj_sub in range(subchunk.dim[0]):
        #for jj_sub in range(61,80): #debug
            # dimension of variable: time,x,y
            # preload all the y data here to avoid overhead due to calling Dataset.variables at each iteration in the inner loop
            data_test0 = hdf_ts['vars/'+tsvar][:,jj_sub+offsety,offsetx:offsetx+subchunk.dim[1]]
            #data_test0 = hdf_ts.variables[tsvar][:500,sub_chunks_x[ii_sub],:]

            for ii_sub in range(subchunk.dim[1]):
            #for ii_sub in range(55,100):
                if b_deb: print('---------------')
                if b_deb: print('jj: {} - ii: {} '.format(jj_sub, ii_sub))
                
                t0 = timer()

                data_test = data_test0[:,ii_sub]

                ## remove tie group
                data_test[1:][np.diff(data_test)==0.] = np.nan
        
                #data_test=hdf_ts.variables[tsvar][:,sub_chunks_x[ii_sub],sub_chunks_y[jj_sub]]
                slope=999.0

                if b_deb:
                    print('t0', timer()-t0)
                    t0 = timer()

                if b_deb: print('Data valid:', data_test.size - np.isnan(data_test).sum(), '/', data_test.size)
                
                if 0:
                    print('Use mstats')
                    data_sen=np.ma.masked_array(data_test, mask=np.isnan(data_test))
                    t0 = timer()
                    slope, intercept, lo_slope, up_slope = mstats.theilslopes(data_sen, alpha=0.1)
                    print('slope, intercept, lo_slope, up_slop:')
                    print(slope, intercept, lo_slope, up_slope)
                    if b_deb: print('t02', timer()-t0)
                    np.savetxt('data_test.dat', data_test.T)
                    sys.exit()
                    t0 = timer()

                # this mstats give correct slope and is consistent with python man-kendall score of Sn; this is fast than Fortran'''
                # stats.theilslopes is giving incorrect values when NaN are inside data'''
                
                if b_deb:
                    print('t2', timer()-t0)
                    t0 = timer()

                if 1:
                    ## orinal mann-kendall test :
                    bla = data_test[~np.isnan(data_test)]
                    if bla.size > 0:
                        #print('min/mean/max/nb/nb_unique', bla.min(), bla.mean(), bla.max(), len(bla), len(np.unique(bla)))
                        #print(bla)
                        if len(np.unique(bla))==1:
                            p,z,Sn,nx = [0,0,0,0] 
                        else:
                            #data_test = data_test[-10:] # debug line to speed up
                            
                            #try:
                            #    p,z,Sn,nx = mk_test_timeout(data_test)
                            #except TimeoutError as e:
                            #    print('timeout!')
                            #    p,z,Sn,nx = [0,0,0,0] 
                            p,z,Sn,nx = m.mk_trend(len(data_test), np.arange(len(data_test)), data_test)
                    else:
                        p,z,Sn,nx = m.mk_trend(len(data_test), np.arange(len(data_test)), data_test)
                        # if data_test = [], the test return (p,z,Sn,nx) = (1.0, 0.0, 0.5, 0.0)
                else:
                    ## other test
                    p,z,Sn,nx = [0,0,0,0] 
                    z = data_test.mean()

                if b_deb:
                    t4 = timer()-t0
                    if bla.size>0:
                        if bla.mean()==0.0:
                            tab_prof_zero.append(t4)
                        else:
                            tab_prof_valid.append(t4)
                        
                    print('t4=', t4)
                    if 0:
                        import matplotlib.pyplot as plt
                        plt.clf()
                        plt.plot(bla)
                        plt.ylim(0,6.1)
                        ti1 = '{}/{} - {:.3f} s'.format(jj_sub, ii_sub, t4)
                        ti2 = 'min/mean/max/nb/nb_unique {:.3f} {:.3f} {:.3f} {} {}'.format(bla.min(), bla.mean(), bla.max(), len(bla), len(np.unique(bla)))
                        ti3 = 'slope: {}'.format(Sn)
                        plt.title(ti1+'\n'+ti2+'\n'+ti3)
                        if Sn==0.0:
                            plt.savefig('bla.Sn0.{}.{}.png'.format(jj_sub, ii_sub))
                        else:
                            plt.savefig('bla.{}.{}.png'.format(jj_sub, ii_sub))
                    t0 = timer()

                if b_deb: print('p,z,slope,nx', p,z,slope,nx)
                if b_deb: print('p,z,Sn,nx', p,z,Sn,nx)

                var_temp_output[jj_sub,ii_sub,0] = p
                var_temp_output[jj_sub,ii_sub,1] = z
                var_temp_output[jj_sub,ii_sub,2] = Sn
                var_temp_output[jj_sub,ii_sub,3] = nx

           
            ## Print efficiency stats
            if (jj_sub+1)%print_freq==0:
                elapsed = timer()-t00
                data_stat = hdf_ts.variables[tsvar][:,jj_sub+1+offsety-print_freq:jj_sub+1+offsety,offsetx:offsetx+subchunk.dim[1]]
                valid = 100.*(data_stat.size - np.count_nonzero(np.isnan(data_stat)))/data_stat.size
                eff = 1e6*elapsed/data_stat.size
                #print(subchunk.dim, data_test0.shape)
                print('{} : {}.{}.block[{}-{}] : {:.3f}s elapsed : {:.3f} us/pix/date : {:.2f}% valid'.format(datetime.datetime.now(), parent_iteration, child_iteration, jj_sub+1-print_freq, jj_sub+1, elapsed, eff, valid))

                t00 = timer()
                sys.stdout.flush()

            if 0:
                t_mean += timer()-t00
                #print(f't00 {ii_sub} {t_mean/(ii_sub+1)}')
                #print(f't00.p{current._identity[0]}.it{ii_sub} {t_mean/(ii_sub+1):.3f}s {process.memory_info().rss/1024/1024:.2f}Mo')
                print('t00.p{}.it{} {:.3f}s {:.2f}Mo'.format(current._identity[0], ii_sub, t_mean/(ii_sub+1), process.memory_info().rss/1024/1024))
                v = var_temp_output[ii_sub,:,:]
                for ii in range(4):
                    #print(f'{np.count_nonzero(np.isnan(v[:,ii]))/v[:,ii].size:.3f}', np.nanmin(v[:,ii]), np.nanmax(v[:,ii]))
                    print('{:.3f}'.format(np.count_nonzero(np.isnan(v[:,ii]))/v[:,ii].size), np.nanmin(v[:,ii]), np.nanmax(v[:,ii]))
                print(np.nanmin(v), np.nanmax(v))
                t00 = timer()
   
        if b_deb:
        #if 1:
            valid = np.array(tab_prof_valid)
            zero = np.array(tab_prof_zero)

            #print('valid:', valid.size, valid.min(), valid.mean(), valid.max())
            #print('zero:', zero.size, zero.min(), zero.mean(), zero.max())
            print(valid.mean())
            print(zero.mean())
            #return
            #sys.exit()

        print('t000tot.p{} {:.3f}s {:.2f}Mo'.format(current._identity, timer()-t000, process.memory_info().rss/1024/1024))

        
        hf.create_dataset(tsvar+'/pval', data=var_temp_output[:,:,0])
        hf.create_dataset(tsvar+'/zval', data=var_temp_output[:,:,1])
        hf.create_dataset(tsvar+'/slope', data=var_temp_output[:,:,2])
        hf.create_dataset(tsvar+'/len', data=var_temp_output[:,:,3])

    hf.close() 
    
    print ('Subchunk {} completed, save to {}'.format(child_iteration, subchunk_fname))
    
    return None 
Exemple #6
0
 def mk_test_timeout(data_test):
     return m.mk_trend(len(data_test), np.arange(len(data_test)), data_test)
Exemple #7
0
 def preproc(y):
     # Second 70% threshold: compute MK test only if 70% of valid data
     if (np.count_nonzero(np.isnan(y)) / len(y)) > 0.3:
         if b_deb: print("WARNING: More than 30%  of NaN")
         return tuple([np.nan] * 4)
     return m.mk_trend(len(y), np.arange(len(y)), y, 1)