def test_detrend_dataframe(self): columns = ["one", "two"] index = [c for c in "abcde"] data = pd.DataFrame(self.data_2d, columns=columns, index=index) detrended = tools.detrend(data, order=1, axis=0) assert_array_almost_equal(detrended.values, np.zeros_like(data)) assert_frame_equal( detrended, pd.DataFrame(detrended.values, columns=columns, index=index), ) detrended = tools.detrend(data, order=0, axis=0) assert_array_almost_equal(detrended.values, [[-4, -4], [-2, -2], [0, 0], [2, 2], [4, 4]]) assert_frame_equal( detrended, pd.DataFrame(detrended.values, columns=columns, index=index), ) detrended = tools.detrend(data, order=0, axis=1) assert_array_almost_equal( detrended.values, [[-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5]], ) assert_frame_equal( detrended, pd.DataFrame(detrended.values, columns=columns, index=index), )
def test_detrend_series(self): data = pd.Series(self.data_1d, name="one") detrended = tools.detrend(data, order=1) assert_array_almost_equal(detrended.values, np.zeros_like(data)) assert_series_equal(detrended, pd.Series(detrended.values, name="one")) detrended = tools.detrend(data, order=0) assert_array_almost_equal(detrended.values, pd.Series([-2, -1, 0, 1, 2])) assert_series_equal(detrended, pd.Series(detrended.values, name="one"))
def test_detrend_2d(self): data = self.data_2d assert_array_almost_equal(tools.detrend(data, order=1, axis=0), np.zeros_like(data)) assert_array_almost_equal( tools.detrend(data, order=0, axis=0), [[-4, -4], [-2, -2], [0, 0], [2, 2], [4, 4]], ) assert_array_almost_equal( tools.detrend(data, order=0, axis=1), [[-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5]], )
def _perform_detrending(): df_res = pd.DataFrame(columns=df.columns) for name in df.columns: series = df[name] res = detrend(series, order) df_res[name] = series.dropna(axis=0) return _stationary(df_res), df_res
def plot_acf(x, nlags, _acf, _pacf, data_dir, png_dir, svg_dir): lags_acf = range(len(_acf)) lags_pacf = range(len(_pacf)) png_path = png_dir / ("acf.png") svg_path = svg_dir / ("acf.svg") plt.figure() fig = tpl.plot_acf(x, lags=nlags) fig.savefig(png_path) fig.savefig(svg_path) csv_path = data_dir / ("acf.csv") df_acf = pd.DataFrame({'lags': lags_acf, 'acf': _acf}) df_acf.set_index('lags', inplace=True) df_acf.to_csv(csv_path) png_path = png_dir / ("acf_default_lag.png") svg_path = svg_dir / ("acf_default_lag.svg") plt.figure() fig = tpl.plot_acf(x) fig.savefig(png_path) fig.savefig(svg_path) png_path = png_dir / ("pacf.png") svg_path = svg_dir / ("pacf.svg") plt.figure() fig = tpl.plot_pacf(x) fig.savefig(png_path) fig.savefig(svg_path) csv_path = data_dir / ("pacf.csv") df_pacf = pd.DataFrame({'lags': lags_pacf, 'acf': _pacf}) df_pacf.set_index('lags', inplace=True) df_pacf.to_csv(csv_path) # detrened detr_x = detrend(x, order=2) png_path = png_dir / ("detrend_acf.png") svg_path = svg_dir / ("detrend_acf.svg") plt.figure() fig = tpl.plot_acf(detr_x, lags=nlags) fig.savefig(png_path) fig.savefig(svg_path) png_path = png_dir / ("detrend_acf_default_lag.png") svg_path = svg_dir / ("detrend_acf_default_lag.svg") plt.figure() fig = tpl.plot_acf(detr_x) fig.savefig(png_path) fig.savefig(svg_path) png_path = png_dir / ("detrend_pacf.png") svg_path = svg_dir / ("detrend_pacf.svg") plt.figure() fig = tpl.plot_pacf(detr_x) fig.savefig(png_path) fig.savefig(svg_path)
def _detrend(x, method): if method == 'diff': return np.diff(x) if isinstance(method, str): method = dict(constant=0, linear=1, quadratic=2, cubic=3)[method] if isinstance(method, Number): x = detrend(x, method) return x
def analyze_runs(self, subcode): print ">>> Analyzing sub{:0>3d}".format(subcode) subject = self._fmri_dataset.subject_dir(subcode=subcode) for task, directories in subject.dir_tree('functional').iteritems(): for directory in directories: run_name = directory.split('/')[-1] maskfile = os.path.join(subject.masks_dir(), run_name, 'gray.nii.gz') nonbrain_mask = os.path.join(subject.masks_dir(), run_name, 'non_brain.nii.gz') img = nib.load(os.path.join(directory, 'bold_mcf.nii.gz')) imgdata = img.get_data() maskimg = nib.load(maskfile) maskdata = maskimg.get_data() nonbrain_maskdata = nib.load(nonbrain_mask).get_data() voxmean = np.mean(imgdata, 3) voxstd = np.std(imgdata, 3) maskvox = np.where(maskdata > 0) nonmaskvox = np.where(nonbrain_maskdata > 0) voxsfnr = voxmean / voxstd meansfnr = np.mean(voxsfnr[maskvox]) imgsnr = np.zeros(imgdata.shape[3]) for t in range(imgdata.shape[3]): tmp = imgdata[:, :, :, t] tmp_brain = tmp[maskvox] tmp_nonbrain = tmp[nonmaskvox] maskmean = np.mean(tmp_brain) imgsnr[t] = maskmean / np.std(tmp_nonbrain) task_name, run_number = run_name.split('_') print '{} \n {}\nsfnr => {:>40f} \nsnr => {:>40f}'.format( self._taskname_mapping[task_name], run_number, meansfnr, np.mean(imgsnr)) continue detrended_zscore = np.zeros(imgdata.shape) detrended_data = np.zeros(imgdata.shape) print imgdata.shape for i in range(len(maskvox[0])): tmp = imgdata[maskvox[0][i], maskvox[1][i], maskvox[2][i], :] tmp_detrended = detrend(tmp) detrended_data[maskvox[0][i], maskvox[1][i], maskvox[2][i], :] = tmp_detrended detrended_zscore[maskvox[0][i], maskvox[1][i], maskvox[2][i], :] = ( tmp_detrended - np.mean(tmp_detrended) ) / np.std(tmp_detrended) voxmean_detrended = np.mean(detrended_data, 3) voxstd_detrended = np.std(detrended_data, 3)
def detrend_ts(ts, norder=3, axis=0): """ Detrend time series with statsmodel detrend function """ (n, m, N, c) = ts.shape for ichan in range(n): for icat in range(c): X = ts[ichan, :, :, icat] for order in range(norder): X = detrend(X, order=order, axis=axis) ts[ichan, :, :, icat] = X return ts
def data_prep_ml(data_read_simulation,data_read_scanner,imask,nwindows,lwindows): stack_sim = None stack_scn = None noise = None stack_sim_flip= None stack_scn_flip = None noise_flip = None for i in range(len(imask)): simulation = data_read_simulation.get_data()[np.nonzero(imask[i])[0],np.nonzero(imask[i])[1],i,:] scanner = data_read_scanner.get_data()[np.nonzero(imask[i])[0],np.nonzero(imask[i])[1],i,:] for j in range(np.count_nonzero(imask[i])): sim = simulation[j,:] sim = sim - np.mean(sim) scn_orig = scanner[j,:] scn = scanner[j,:] scn = detrend(scn,2) noise_temp= scn- sim sim_flip = np.flip(sim,axis=-1) scn_flip = np.flip(scn,axis=-1) noise_temp_flip = np.flip(noise_temp,axis=-1) if stack_sim is None: stack_sim = sim.reshape((nwindows,lwindows)) stack_scn= scn.reshape((nwindows,lwindows)) noise = noise_temp.reshape((nwindows,lwindows)) stack_sim_flip = sim_flip.reshape((nwindows,lwindows)) stack_scn_flip= scn_flip.reshape((nwindows,lwindows)) noise_flip = noise_temp_flip.reshape((nwindows,lwindows)) else: stack_sim = np.vstack((stack_sim,sim.reshape((nwindows,lwindows)))) stack_scn = np.vstack((stack_scn,scn.reshape((nwindows,lwindows)))) noise = np.vstack((noise,noise_temp.reshape((nwindows,lwindows)))) stack_sim_flip = np.vstack((stack_sim_flip,sim_flip.reshape((nwindows,lwindows)))) stack_scn_flip = np.vstack((stack_scn_flip,scn_flip.reshape((nwindows,lwindows)))) noise_flip = np.vstack((noise_flip,noise_temp_flip.reshape((nwindows,lwindows)))) return stack_scn, stack_sim, noise, stack_scn_flip, stack_sim_flip, noise_flip
def analyze_runs(self, subcode): print ">>> Analyzing sub{:0>3d}".format(subcode) subject = self._fmri_dataset.subject_dir(subcode=subcode) for task , directories in subject.dir_tree('functional').iteritems(): for directory in directories: run_name = directory.split('/')[-1] maskfile = os.path.join(subject.masks_dir(),run_name,'grey.nii.gz') nonbrain_mask = os.path.join(subject.masks_dir(),run_name,'non_brain.nii.gz') img = nib.load(os.path.join(directory,'bold_mcf.nii.gz')) imgdata = img.get_data() maskimg=nib.load(maskfile) maskdata=maskimg.get_data() nonbrain_maskdata = nib.load(nonbrain_mask).get_data() voxmean=np.mean(imgdata,3) voxstd=np.std(imgdata,3) maskvox=np.where(maskdata>0) nonmaskvox=np.where(nonbrain_maskdata>0) voxsfnr=voxmean/voxstd meansfnr=np.mean(voxsfnr[maskvox]) imgsnr=np.zeros(imgdata.shape[3]) for t in range(imgdata.shape[3]): tmp=imgdata[:,:,:,t] tmp_brain=tmp[maskvox] tmp_nonbrain=tmp[nonmaskvox] maskmean=np.mean(tmp_brain) imgsnr[t]=maskmean/np.std(tmp_nonbrain) task_name, run_number = run_name.split('_') print '{} \n {}\nsfnr => {:>40f} \nsnr => {:>40f}'.format(self._taskname_mapping[task_name],run_number,meansfnr,np.mean(imgsnr)) continue detrended_zscore=np.zeros(imgdata.shape) detrended_data=np.zeros(imgdata.shape) print imgdata.shape for i in range(len(maskvox[0])): tmp=imgdata[maskvox[0][i],maskvox[1][i],maskvox[2][i],:] tmp_detrended=detrend(tmp) detrended_data[maskvox[0][i],maskvox[1][i],maskvox[2][i],:]=tmp_detrended detrended_zscore[maskvox[0][i],maskvox[1][i],maskvox[2][i],:]=(tmp_detrended - np.mean(tmp_detrended))/np.std(tmp_detrended) voxmean_detrended=np.mean(detrended_data,3) voxstd_detrended=np.std(detrended_data,3)
def find_frequency(x): """ TODO fit auto regressive model to residuals. Get fitted values return """ ts_vals = x.to_numpy() ts_vals = np.array(ts_vals) n = len(x) detrended_series = detrend(ts_vals) slope, intercept, _, _, _ = linregress(range(len(detrended_series)), detrended_series) residuals = np.array([ detrended_series[idx] - ((slope * idx) - intercept) for idx, val in enumerate(detrended_series) ]) f, spec = spectral_density(residuals) if (max(spec) > 3): max_freq_idx = np.argmax(spec) period = np.floor((1 / f[max_freq_idx]) + 0.5) if (period == np.inf): diff = [spec[i] - spec[i - 1] for i in range(1, len(spec))] j = [] for idx, val in enumerate(diff): if val > 0: j.append(idx) if len(j): next_max = j[0] + np.argmax(spec[j[0] + 1:]) if next_max < len(f): period = np.floor((1 / f[next_max]) + 0.5) else: period = 1.0 else: period = 1.0 else: period = 1.0 return period
def amplitude(x, y, period=24, tol=0.5, nout=10000): r""" Extract the amplitude amplitude from a time-series using the lombscargle \ method. Args: :param x: time component :param y: the wave at each time-step :param period: the period of one oscillation :param tol: how far above and below the period to compute the periodigram default = 0.5 * period :param nout: number of periods to output :returns: Periods, the associated amplitudes for each period, the index of the max amplitude and the max amplitude. """ y = detrend(y) tolerance = period * tol periods = np.linspace(period - tolerance, period + tolerance, nout) freqs = 1 / periods angular_freqs = 2 * np.pi * freqs pgram = signal.lombscargle(x, y, angular_freqs) normalized_pgram = np.sqrt(4 * (pgram / len(y))) index = np.argmax(normalized_pgram) return periods, normalized_pgram, index, normalized_pgram[index]
def fBIRN_SFNR(imgdata,maskfile=None,verbose=False,plot_data=True): #(1) Preliminars #flag for storing sfnr as nii image #save_sfnr=False #get number slices nslices=imgdata.shape[2] #get number dynamics/voxels/volumes ndynamics=imgdata.shape[3] #(2) Generate on-the-fly mask binary image (if needed) #compute the mask image if maskfile==None : # prior to compute the mask, compute the mean of the whole dynamics to get a single 3D blob voxmean=numpy.mean(imgdata,3) # compute thresholding using Otsu method, get then a mask and a non-mask matrix objects maskThreshold = skimage.filters.threshold_otsu(voxmean) maskdata=numpy.where(voxmean>maskThreshold, 1, 0) nonmaskdata=numpy.where(voxmean>maskThreshold, 0, 1) # remove black wholes #structElem = skimage.morphology.ball(2) #maskdata=skimage.morphology.closing(maskdata, structElem) #maskdata=skimage.morphology.erosion(maskdata, skimage.morphology.ball(1)) #nonmaskdata=skimage.morphology.closing(nonmaskdata, structElem) #nonmaskdata=skimage.morphology.dilate(nonmaskdata, structElem) maskdata=skimage.morphology.closing(maskdata) nonmaskdata=skimage.morphology.closing(nonmaskdata) maskvox=numpy.where(maskdata>0) nonmaskvox=numpy.where(maskdata==0) if verbose: print '[info] nmaskvox: %i' %len(maskvox[0]) print '[info] nnonmaskvox: %i' %len(nonmaskvox[0]) else : maskimg=nibabel.load(maskfile) maskdata=maskimg.get_data() nonmaskdata=numpy.where(maskdata==1, 0, 1) maskvox=numpy.where(maskdata>0) nonmaskvox=numpy.where(maskdata==0) if verbose: print '[info] nmaskvox: %i' %len(maskvox[0]) print '[info] nnonmaskvox: %i' %len(nonmaskvox[0]) # ToDo: EXCLUDED by now # load motion parameters and compute FD and identify bad vols for # potential scrubbing (ala Power et al.) #motpars=numpy.loadtxt(motfile) #fd=compute_fd(motpars) #numpy.savetxt(os.path.join(qadir,'fd.txt'),fd) #(3) Compute std, mean and coeff of variation values # compute the voxels mean (from all dynamics, create a mean volume of the pixels of the array of 3D elements) voxmean=numpy.mean(imgdata,3) # compute the voxels standard deviation (std) from all dynamics, calculated across all equally positioned pixels in the array of 3D elements # std :: variation or dispersion of a set (the spread of a distribution measurement) voxstd=numpy.std(imgdata,3) # compute the voxel coefficient of variation # cv :: standardized measure of dispersion of a probability distribution (extent of variability in relation to the mean of the population) voxcv=voxstd/numpy.abs(voxmean) # at computing the coef. of variation --> RuntimeWarning: invalid value encountered in divide (NaN) # replace in the 3D object the NaN values for zeros voxcv[numpy.isnan(voxcv)]=0 # replace in the 3D object the larger values for 1s voxcv[voxcv>1]=1 # (4) compute timepoint statistics # create arrays for storing per-dynamic computed statistical values for mean,median,mad,cv and SNR (after applying mask) maskmedian=numpy.zeros(ndynamics) maskmean=numpy.zeros(ndynamics) maskmad=numpy.zeros(ndynamics) maskcv=numpy.zeros(ndynamics) imgsnr=numpy.zeros(ndynamics) # loop over dynamics to compute statistical values and store them in the arrays for plotting for t in range(ndynamics): tmp=imgdata[:,:,:,t] tmp_brain=tmp[maskvox] tmp_nonbrain=tmp[nonmaskvox] maskmad[t]=MAD.MAD(tmp_brain) maskmedian[t]=numpy.median(tmp_brain) maskmean[t]=numpy.mean(tmp_brain) maskcv[t]=maskmad[t]/maskmedian[t] # define SNR as mean(masked(Image))/std(noise) # signal :: mean pixel intensity value in a region of interest (ROI) # noise :: standard deviation in pixel intensity in background air (free of ghosting artefacts) # SNR is calculated using SNR = 0.655xS/s. The 0.655 factor is due to the Rician distribution # of the background noise in a magnitude image (tending to a Rayleigh distribution as the SNR goes to zero), #which arises because noise variations, which can be negative and positive, are all made positive which artificially reduces s a bit. imgsnr[t]=maskmean[t]/numpy.std(tmp_nonbrain) # spike detection - (Greve et al./fBIRN) # 1. Remove mean and temporal trend from each voxel. # 2. Compute temporal Z-score for each voxel. # 3. Average the absolute Z-score (AAZ) within a each slice and time point separately. # This gives a matrix with number of rows equal to the number of slices (nSlices) # and number of columns equal to the number of time points (nFrames). # 4. Compute new Z-scores using a jackknife across the slices (JKZ). For a given time point, # remove one of the slices, compute the average and standard deviation of the AAZ across # the remaining slices. Use these two numbers to compute a Z for the slice left out # (this is the JKZ). The final Spike Measure is the absolute value of the JKZ (AJKZ). # Repeat for all slices. This gives a new nSlices-by-nFrames matrix (see Figure 8). # This procedure tends to remove components that are common across slices and so rejects motion. if verbose: print '[info] computing spike stats...' detrended_zscore=numpy.zeros(imgdata.shape) detrended_data=numpy.zeros(imgdata.shape) for i in range(len(maskvox[0])): tmp=imgdata[maskvox[0][i],maskvox[1][i],maskvox[2][i],:] tmp_detrended=detrend(tmp) detrended_data[maskvox[0][i],maskvox[1][i],maskvox[2][i],:]=tmp_detrended detrended_zscore[maskvox[0][i],maskvox[1][i],maskvox[2][i],:]=(tmp_detrended - numpy.mean(tmp_detrended))/numpy.std(tmp_detrended) loo=sklearn.cross_validation.LeaveOneOut(nslices) AAZ=numpy.zeros((nslices,ndynamics)) for s in range(nslices): for t in range(ndynamics): AAZ[s,t]=numpy.mean(numpy.abs(detrended_zscore[:,:,s,t])) JKZ=numpy.zeros((nslices,ndynamics)) if verbose: print '[info] computing outliers...' for train,test in loo: for tp in range(ndynamics): train_mean=numpy.mean(AAZ[train,tp]) train_std=numpy.std(AAZ[train,tp]) JKZ[test,tp]=(AAZ[test,tp] - train_mean)/train_std AJKZ=numpy.abs(JKZ) spikes=[] if numpy.max(AJKZ)>AJKZ_thresh: if verbose : print '[warning] Possible spike: Max AJKZ = %f'%numpy.max(AJKZ) spikes=numpy.where(numpy.max(AJKZ,0)>AJKZ_thresh)[0] #if len(spikes)>0: # numpy.savetxt(os.path.join(qadir,'spikes.txt'),spikes) voxmean_detrended=numpy.mean(detrended_data,3) voxstd_detrended=numpy.std(detrended_data,3) # compute per-voxel SFNR! voxsfnr=voxmean/voxstd # compute mean SFNR! meansfnr=numpy.mean(voxsfnr[maskvox]) # create plots datavars={'imgsnr':imgsnr,'meansfnr':meansfnr,'spikes':spikes}#,'badvols':badvols_expanded_index} if plot_data: print '[info] Plotting data to image files!' #trend=plot_timeseries(maskmean,'Mean signal (unfiltered)',os.path.join(qadir,'maskmean.png'), # plottrend=True,ylabel='Mean MR signal') #datavars['trend']=trend #plot_timeseries(maskmad,'Median absolute deviation (robust SD)', # os.path.join(qadir,'mad.png'),ylabel='MAD') # [DEBUG] store SNR for comparing with precomputed mask (BET) #plot_timeseries(imgsnr,'SNR', # os.path.join(qadir,'snr.png'),plottrend=True,ylabel='SNR') # ToDo: EXCLUDED #plot_timeseries(DVARS,'DVARS (root mean squared signal derivative over brain mask)', # os.path.join(qadir,'DVARS.png'),plotline=0.5,ylabel='DVARS') # ToDo: EXCLUDED #plot_timeseries(fd,'Framewise displacement',os.path.join(qadir,'fd.png'), # badvols_expanded_index,'Timepoints to scrub (%d total)'%len(badvols), # plotline=0.5,ylims=[0,1],ylabel='FD') # Compute the power spectral density # (i.e. how the power of a signal is distributed over the different frequencies) #psd=matplotlib.mlab.psd(maskmean,NFFT=128,noverlap=96,Fs=1/TR) #plt.clf() #fig=plt.figure(figsize=[10,3]) #fig.subplots_adjust(bottom=0.15) #plt.plot(psd[1][2:],numpy.log(psd[0][2:])) #plt.title('Log power spectrum of mean signal across mask') #plt.xlabel('frequency (secs)') #plt.ylabel('log power') #plt.savefig(os.path.join(qadir,'meanpsd.png'),bbox_inches='tight') #plt.close() #plt.clf() #plt.imshow(AJKZ,vmin=0,vmax=AJKZ_thresh) #plt.xlabel('timepoints') #plt.ylabel('slices') #plt.title('Spike measure (absolute jackknife Z)') #plt.savefig(os.path.join(qadir,'spike.png'),bbox_inches='tight') #plt.close() #if img.shape[0]<img.shape[1] and img.shape[0]<img.shape[2]: # orientation='saggital' #else: # orientation='axial' #mk_slice_mosaic(voxmean,os.path.join(qadir,'voxmean.png'),'Image mean (with mask)',contourdata=maskdata) #mk_slice_mosaic(voxcv,os.path.join(qadir,'voxcv.png'),'Image coefficient of variation') #mk_slice_mosaic(voxsfnr,os.path.join(qadir,'voxsfnr.png'),'Image SFNR') # WATCH OUT: PDF report creator excluded #mk_report(infile,qadir,datavars) # WATCH OUT: excluded as is not being used! # def save_vars(infile,qadir,datavars): #ToDo: save values in a csv file!? #datafile=os.path.join(qadir,'qadata.csv') #f=open(datafile,'w') #f.write('SNR,%f\n'%numpy.mean(datavars['imgsnr'])) #f.write('SFNR,%f\n'%datavars['meansfnr']) #f.write('drift,%f\n'%datavars['trend'].params[1]) #f.write('nspikes,%d\n'%len(datavars['spikes'])) #f.write('nscrub,%d\n'%len(datavars['badvols'])) #f.close() if len(datavars['spikes']) > 0: try: # create a temporary working directory tmpLocation=tempfile.mkdtemp() # lets create a plot of the spike measures plt.clf() plt.imshow(AJKZ,vmin=0,vmax=AJKZ_thresh) plt.xlabel('timepoints') plt.ylabel('slices') plt.title('Spike measure (absolute jackknife Z)') plt.savefig(os.path.join(tmpLocation,'spikeStats.png'),bbox_inches='tight') plt.close() except Exception as e: if args['verbose'] : print ' [warning] Cannot create spike stats plot.\r\n Reason:: %s' %(current_file,e) else: if thumbnail is not None : if args['verbose'] : print ' [Info] Spike stats plot generated' finally: # Always delete the temporary directory if os.path.exists(tmpLocation) : shutil.rmtree(tmpLocation) if verbose: print '[info] SNR, %f'%numpy.mean(datavars['imgsnr']) print '[info] SFNR, %f'%datavars['meansfnr'] #print '[info] drift, %f'%datavars['trend'].params[1] if len(datavars['spikes']) > 0: print '[info] nspikes,%d'%len(datavars['spikes']) #if save_sfnr: # sfnrimg=nibabel.Nifti1Image(voxsfnr,img.get_affine()) # sfnrimg.to_filename(os.path.join(qadir,'voxsfnr.nii.gz')) return datavars
def fmriqa(infile, TR, outdir=None, maskfile=None, motfile=None, verbose=False, plot_data=True): save_sfnr = True if os.path.dirname(infile) == '': basedir = os.getcwd() infile = os.path.join(basedir, infile) elif os.path.dirname(infile) == '.': basedir = os.getcwd() infile = os.path.join(basedir, infile.replace('./', '')) else: basedir = os.path.dirname(infile) if outdir == None: outdir = basedir qadir = os.path.join(outdir, 'QA') if not infile.find('mcf.nii.gz') > 0: error_and_exit('infile must be of form XXX_mcf.nii.gz') if not os.path.exists(infile): error_and_exit('%s does not exist!' % infile) if maskfile == None: maskfile = infile.replace('mcf.nii', 'mcf_brain_mask.nii') if not os.path.exists(maskfile): error_and_exit('%s does not exist!' % maskfile) if motfile == None: motfile = infile.replace('mcf.nii.gz', 'mcf.par') if not os.path.exists(motfile): error_and_exit('%s does not exist!' % motfile) if not os.path.exists(qadir): os.mkdir(qadir) else: print 'QA dir already exists - overwriting!' if verbose: print 'infile:', infile print 'maskfile:', maskfile print 'motfile:', motfile print 'outdir:', outdir print 'computing image stats' img = nib.load(infile) imgdata = img.get_data() nslices = imgdata.shape[2] ntp = imgdata.shape[3] maskimg = nib.load(maskfile) maskdata = maskimg.get_data() maskvox = N.where(maskdata > 0) nonmaskvox = N.where(maskdata == 0) if verbose: print 'nmaskvox:', len(maskvox[0]) # load motion parameters and compute FD and identify bad vols for # potential scrubbing (ala Power et al.) motpars = N.loadtxt(motfile) fd = compute_fd(motpars) N.savetxt(os.path.join(qadir, 'fd.txt'), fd) voxmean = N.mean(imgdata, 3) voxstd = N.std(imgdata, 3) voxcv = voxstd / N.abs(voxmean) voxcv[N.isnan(voxcv)] = 0 voxcv[voxcv > 1] = 1 # compute timepoint statistics maskmedian = N.zeros(imgdata.shape[3]) maskmean = N.zeros(imgdata.shape[3]) maskmad = N.zeros(imgdata.shape[3]) maskcv = N.zeros(imgdata.shape[3]) imgsnr = N.zeros(imgdata.shape[3]) for t in range(imgdata.shape[3]): tmp = imgdata[:, :, :, t] tmp_brain = tmp[maskvox] tmp_nonbrain = tmp[nonmaskvox] maskmad[t] = MAD(tmp_brain) maskmedian[t] = N.median(tmp_brain) maskmean[t] = N.mean(tmp_brain) maskcv[t] = maskmad[t] / maskmedian[t] imgsnr[t] = maskmean[t] / N.std(tmp_nonbrain) # perform Greve et al./fBIRN spike detection #1. Remove mean and temporal trend from each voxel. #2. Compute temporal Z-score for each voxel. #3. Average the absolute Z-score (AAZ) within a each slice and time point separately. # This gives a matrix with number of rows equal to the number of slices (nSlices) # and number of columns equal to the number of time points (nFrames). #4. Compute new Z-scores using a jackknife across the slices (JKZ). For a given time point, # remove one of the slices, compute the average and standard deviation of the AAZ across # the remaining slices. Use these two numbers to compute a Z for the slice left out # (this is the JKZ). The final Spike Measure is the absolute value of the JKZ (AJKZ). # Repeat for all slices. This gives a new nSlices-by-nFrames matrix (see Figure 8). # This procedure tends to remove components that are common across slices and so rejects motion. if verbose: print 'computing spike stats' detrended_zscore = N.zeros(imgdata.shape) detrended_data = N.zeros(imgdata.shape) for i in range(len(maskvox[0])): tmp = imgdata[maskvox[0][i], maskvox[1][i], maskvox[2][i], :] tmp_detrended = detrend(tmp) detrended_data[maskvox[0][i], maskvox[1][i], maskvox[2][i], :] = tmp_detrended detrended_zscore[maskvox[0][i], maskvox[1][i], maskvox[2][i], :] = ( tmp_detrended - N.mean(tmp_detrended)) / N.std(tmp_detrended) loo = sklearn.cross_validation.LeaveOneOut(nslices) AAZ = N.zeros((nslices, ntp)) for s in range(nslices): for t in range(ntp): AAZ[s, t] = N.mean(N.abs(detrended_zscore[:, :, s, t])) JKZ = N.zeros((nslices, ntp)) if verbose: print 'computing outliers' for train, test in loo: for tp in range(ntp): train_mean = N.mean(AAZ[train, tp]) train_std = N.std(AAZ[train, tp]) JKZ[test, tp] = (AAZ[test, tp] - train_mean) / train_std AJKZ = N.abs(JKZ) spikes = [] if N.max(AJKZ) > AJKZ_thresh: print 'Possible spike: Max AJKZ = %f' % N.max(AJKZ) spikes = N.where(N.max(AJKZ, 0) > AJKZ_thresh)[0] if len(spikes) > 0: N.savetxt(os.path.join(qadir, 'spikes.txt'), spikes) voxmean_detrended = N.mean(detrended_data, 3) voxstd_detrended = N.std(detrended_data, 3) voxsfnr = voxmean / voxstd meansfnr = N.mean(voxsfnr[maskvox]) # create plots # #imgdata_flat=imgdata.reshape(N.prod(imgdata.shape)) #imgdata_nonzero=imgdata_flat[imgdata_flat>0.0] scaledmean = (maskmean - N.mean(maskmean)) / N.std(maskmean) mean_running_diff = N.zeros(maskmad.shape) mean_running_diff = (maskmean[1:] - maskmean[:-1]) / ( (maskmean[1:] + maskmean[:-1]) / 2.0) DVARS = N.zeros(fd.shape) DVARS[1:] = N.sqrt(mean_running_diff**2) * 100.0 N.savetxt(os.path.join(qadir, 'dvars.txt'), DVARS) badvol_index_orig = N.where((fd > FDthresh) * (DVARS > DVARSthresh))[ 0] #bad volumes are those where both meanFD >0.5 mm AND DVARS >0.5. print badvol_index_orig print len(badvol_index_orig) badvols = N.zeros(len(DVARS)) badvols[badvol_index_orig] = 1 badvols_expanded = badvols.copy() for i in badvol_index_orig: if i > (nback - 1): start = i - nback else: start = 0 if i < (len(badvols) - nforward): end = i + nforward + 1 else: end = len(badvols) #print i,start,end badvols_expanded[start:end] = 1 badvols_expanded_index = N.where(badvols_expanded > 0)[0] #print badvols_expanded_index if len(badvols_expanded_index) > 0: N.savetxt(os.path.join(qadir, 'scrubvols.txt'), badvols_expanded_index, fmt='%d') # make scrubing design matrix - one colum per scrubbed timepoint scrubdes = N.zeros((len(DVARS), len(badvols_expanded_index))) for i in range(len(badvols_expanded_index)): scrubdes[badvols_expanded_index[i], i] = 1 N.savetxt(os.path.join(qadir, 'scrubdes.txt'), scrubdes, fmt='%d') else: scrubdes = [] # save out complete confound file confound_mtx = N.zeros((len(DVARS), 14)) confound_mtx[:, 0:6] = motpars confound_mtx[1:, 6:12] = motpars[:-1, :] - motpars[1:, :] # derivs confound_mtx[:, 12] = fd confound_mtx[:, 13] = DVARS if not scrubdes == []: confound_mtx = N.hstack((confound_mtx, scrubdes)) N.savetxt(os.path.join(qadir, 'confound.txt'), confound_mtx) #plot_timeseries(scaledmean,'Mean in-mask signal (Z-scored)', # os.path.join(qadir,'scaledmaskmean.png'),spikes,'Potential spikes') datavars = { 'imgsnr': imgsnr, 'meansfnr': meansfnr, 'spikes': spikes, 'badvols': badvols_expanded_index } if plot_data: print 'before plot' trend = plot_timeseries(maskmean, 'Mean signal (unfiltered)', os.path.join(qadir, 'maskmean.png'), plottrend=True, ylabel='Mean MR signal') print 'after plot' datavars['trend'] = trend plot_timeseries(maskmad, 'Median absolute deviation (robust SD)', os.path.join(qadir, 'mad.png'), ylabel='MAD') plot_timeseries( DVARS, 'DVARS (root mean squared signal derivative over brain mask)', os.path.join(qadir, 'DVARS.png'), plotline=0.5, ylabel='DVARS') plot_timeseries(fd, 'Framewise displacement', os.path.join(qadir, 'fd.png'), badvols_expanded_index, 'Timepoints to scrub (%d total)' % len(badvols), plotline=0.5, ylims=[0, 1], ylabel='FD') psd = matplotlib.mlab.psd(maskmean, NFFT=128, noverlap=96, Fs=1 / TR) plt.clf() fig = plt.figure(figsize=[10, 3]) fig.subplots_adjust(bottom=0.15) plt.plot(psd[1][2:], N.log(psd[0][2:])) plt.title('Log power spectrum of mean signal across mask') plt.xlabel('frequency (secs)') plt.ylabel('log power') plt.savefig(os.path.join(qadir, 'meanpsd.png'), bbox_inches='tight') plt.close() plt.clf() plt.imshow(AJKZ, vmin=0, vmax=AJKZ_thresh) plt.xlabel('timepoints') plt.ylabel('slices') plt.title('Spike measure (absolute jackknife Z)') plt.savefig(os.path.join(qadir, 'spike.png'), bbox_inches='tight') plt.close() if img.shape[0] < img.shape[1] and img.shape[0] < img.shape[2]: orientation = 'saggital' else: orientation = 'axial' mk_slice_mosaic(voxmean, os.path.join(qadir, 'voxmean.png'), 'Image mean (with mask)', contourdata=maskdata) mk_slice_mosaic(voxcv, os.path.join(qadir, 'voxcv.png'), 'Image CV') mk_slice_mosaic(voxsfnr, os.path.join(qadir, 'voxsfnr.png'), 'Image SFNR') mk_report(infile, qadir, datavars) # def save_vars(infile,qadir,datavars): datafile = os.path.join(qadir, 'qadata.csv') f = open(datafile, 'w') f.write('SNR,%f\n' % N.mean(datavars['imgsnr'])) f.write('SFNR,%f\n' % datavars['meansfnr']) #f.write('drift,%f\n'%datavars['trend'].params[1]) f.write('nspikes,%d\n' % len(datavars['spikes'])) f.write('nscrub,%d\n' % len(datavars['badvols'])) f.close() if save_sfnr: sfnrimg = nib.Nifti1Image(voxsfnr, img.get_affine()) sfnrimg.to_filename(os.path.join(qadir, 'voxsfnr.nii.gz')) return qadir
def test_detrend_1d(self): data = self.data_1d assert_array_almost_equal(tools.detrend(data, order=1), np.zeros_like(data)) assert_array_almost_equal(tools.detrend(data, order=0), [-2, -1, 0, 1, 2])
global dttname, errname dttname = "M0" errname = "EM0" alldf[dttname] *= -100 alldf[errname] *= -100 ALL = alldf[alldf['Pairs']=='ALL'].copy() allbut = alldf[alldf['Pairs'] != 'ALL'].copy() py1_wmean, py1_wstd = get_wavgwstd(allbut) py1_wmean = py1_wmean.resample('D',how='mean') py1_wstd = py1_wstd.resample('D',how='mean').fillna(0.0) ALL[dttname] = detrend(ALL[dttname]) data = detrend(py1_wmean) plt.subplot(gs[i]) plt.plot(ALL.index, ALL[dttname],c='r',label='ALL: $\delta v/v$ of the mean network') # ALL.to_csv('all.txt') # foruv01 = alldf[alldf['Pairs'] == "YA_FOR_YA_UV05"] # plt.plot(foruv01.index, foruv01['M'],c='b',label='foruv11') plt.fill_between(ALL.index,ALL[dttname]-ALL[errname],ALL[dttname]+ALL[errname],lw=1,color='red',zorder=-1,alpha=0.3) plt.plot(py1_wmean.index, data,c='g',lw=1,zorder=11,label='Weighted mean of $\delta v/v$ of individual pairs') plt.fill_between(py1_wmean.index, data+py1_wstd,data-py1_wstd,color='g',lw=1,zorder=-1,alpha=0.3) plt.ylabel('$\delta v/v$ in %') plt.ylim(0.2,-0.2) # eruptions()
def fmriqa(infile, TR, outdir, maskfile, motfile, verbose=True, plot_data=True): from statsmodels.tsa.tsatools import detrend import ctypes, sys, os flags = sys.getdlopenflags() sys.setdlopenflags(flags|ctypes.RTLD_GLOBAL) import numpy as N import nibabel as nib from compute_fd import * #from statsmodels.tsa.tsatools import detrend import statsmodels.api import matplotlib import numpy as np matplotlib.use('Agg') import matplotlib.pyplot as plt import sklearn.cross_validation from matplotlib.backends.backend_pdf import PdfPages from mk_slice_mosaic import * from matplotlib.mlab import psd from plot_timeseries import plot_timeseries from MAD import MAD from mk_report import mk_report from fmriqa import error_and_exit sys.setdlopenflags(flags) # thresholds for scrubbing and spike detection FDthresh=0.5 #for a volume to be scrubbed both FD and DVARS need to be above 0.5 DVARSthresh=0.5 AJKZ_thresh=25 # number of timepoints forward and back to scrub nback=1 nforward=2 ## function starts here save_sfnr = True if os.path.dirname(infile) == '': basedir = os.getcwd() infile = os.path.join(basedir, infile) elif os.path.dirname(infile) == '.': basedir = os.getcwd() infile = os.path.join(basedir, infile.replace('./', '')) else: basedir = os.path.dirname(infile) if outdir == None: outdir = basedir qadir = os.path.join(outdir) print "%s" %(qadir) #very stupid!! #if not infile.find('rest2anat_denoised_scrubbed_intep.nii.gz') > 0: # print "error in rest file" # error_and_exit('infile must be of form rest2anat_denoised_scrubbed_intep.nii.gz') if not os.path.exists(infile): print "error in infile" error_and_exit('%s does not exist!' % infile) if maskfile == None: maskfile = infile.replace('mcf.nii', 'mcf_brain_mask.nii') if not os.path.exists(maskfile): error_and_exit('%s does not exist!' % maskfile) if motfile == None: motfile = infile.replace('mcf.nii.gz', 'mcf.par') if not os.path.exists(motfile): error_and_exit('%s does not exist!' % motfile) if not os.path.exists(qadir): os.mkdir(qadir) else: print 'QA dir already exists - overwriting!' if verbose: print 'infile:', infile print 'maskfile:', maskfile print 'motfile:', motfile print 'outdir:', outdir print 'computing image stats' img = nib.load(infile) imgdata = img.get_data() #print np.shape(imgdata) #print imgdata.shape nslices = imgdata.shape[2] ntp = imgdata.shape[3] maskimg = nib.load(maskfile) maskdata = maskimg.get_data() maskvox = N.where(maskdata > 0) nonmaskvox = N.where(maskdata == 0) if verbose: print 'nmaskvox:', len(maskvox[0]) # load motion parameters and compute FD and identify bad vols for # potential scrubbing (ala Power et al.) motpars = N.loadtxt(motfile) fd = compute_fd(motpars) N.savetxt(os.path.join(qadir, 'fd.txt'), fd) voxmean = N.mean(imgdata, 3) #temporal mean for each voxel 3 =4th dimension = time voxstd = N.std(imgdata, 3) #temporal standard deviation for each voxel voxcv = voxstd / N.abs(voxmean) voxcv[N.isnan(voxcv)] = 0 voxcv[voxcv > 1] = 1 # compute timepoint statistics maskmedian = N.zeros(imgdata.shape[3]) maskmean = N.zeros(imgdata.shape[3]) maskmad = N.zeros(imgdata.shape[3]) #median absolute deviation = median(abs(a-median(a)/const) maskcv = N.zeros(imgdata.shape[3]) imgsnr = N.zeros(imgdata.shape[3]) for t in range(imgdata.shape[3]): tmp = imgdata[:, :, :, t] tmp_brain = tmp[maskvox] tmp_nonbrain = tmp[nonmaskvox] maskmad[t] = MAD(tmp_brain) maskmedian[t] = N.median(tmp_brain) maskmean[t] = N.mean(tmp_brain) print maskmean[t] maskcv[t] = maskmad[t] / maskmedian[t] imgsnr[t] = maskmean[t] / N.std(tmp_nonbrain) #classical definition of standard deviation # perform Greve et al./fBIRN spike detection # 1. Remove mean and temporal trend from each voxel. # 2. Compute temporal Z-score for each voxel. (z = (value of voxel(t)-temporal mean of voxel/standard deviation of voxel)) # 3. Average the absolute Z-score (AAZ) within a each slice and time point separately. # This gives a matrix with number of rows equal to the number of slices (nSlices) # and number of columns equal to the number of time points (nFrames). # 4. Compute new Z-scores using a jackknife across the slices (JKZ). #WIKIPEDIA: The jackknife estimator of a parameter is found by systematically leaving out each #observation from a dataset and calculating the estimate and then finding the average of #these calculations. #For a given time point, remove one of the slices, compute the average and standard deviation of the AAZ across # the remaining slices. Use these two numbers to compute a Z for the slice left out # (this is the JKZ). The final Spike Measure is the absolute value of the JKZ (AJKZ). # Repeat for all slices. This gives a new nSlices-by-nFrames matrix (see Figure 8). # This procedure tends to remove components that are common across slices and so rejects motion. if verbose: print 'computing spike stats' detrended_zscore = N.zeros(imgdata.shape) detrended_data = N.zeros(imgdata.shape) for i in range(len(maskvox[0])): tmp = imgdata[maskvox[0][i], maskvox[1][i], maskvox[2][i], :] tmp_detrended = detrend(tmp) detrended_data[maskvox[0][i], maskvox[1][i], maskvox[2][i], :] = tmp_detrended detrended_zscore[maskvox[0][i], maskvox[1][i], maskvox[2][i], :] = (tmp_detrended - N.mean( tmp_detrended)) / N.std(tmp_detrended) loo = sklearn.cross_validation.LeaveOneOut(nslices)#creates 30 pairs of trains and left-out slices AAZ = N.zeros((nslices, ntp)) for s in range(nslices): for t in range(ntp): AAZ[s, t] = N.mean(N.abs(detrended_zscore[:, :, s, t])) JKZ = N.zeros((nslices, ntp)) if verbose: print 'computing outliers' for train, test in loo: #train=other slices, test=left-out slice for tp in range(ntp): train_mean = N.mean(AAZ[train, tp])#mean of absolute Z-values of all other slices train_std = N.std(AAZ[train, tp]) #std of absolute Z-values of all other slices JKZ[test, tp] = (AAZ[test, tp] - train_mean) / train_std #new value for left-out slice is the deviation from all other slices. AJKZ = N.abs(JKZ) spikes = [] if N.max(AJKZ) > AJKZ_thresh: print 'Possible spike: Max AJKZ = %f' % N.max(AJKZ) spikes = N.where(N.max(AJKZ, 0) > AJKZ_thresh)[0] if len(spikes) > 0: N.savetxt(os.path.join(qadir, 'spikes.txt'), spikes) voxmean_detrended = N.mean(detrended_data, 3) voxstd_detrended = N.std(detrended_data, 3) voxsfnr = voxmean/ voxstd #needs to keep the same shape for later plotting voxsfnr_withoutz = voxmean[voxstd>0] / voxstd[voxstd>0] #temporal tsnr for each voxel #only use voxels where voxstd >0 #previously it was like this -> creates Nans sometimes #voxsfnr = voxmean/ voxstd print "size of voxel standard deviation" print np.shape print np.size(voxstd[maskvox]) print np.count_nonzero(voxstd[maskvox]) print np.size(voxstd[voxstd>0]) print "resulting voxsfnr" print N.mean(voxsfnr) meansfnr = N.mean(voxsfnr_withoutz)#mean tsnr across all voxels in mask = same as in Julias scripts # create plots # # imgdata_flat=imgdata.reshape(N.prod(imgdata.shape)) # imgdata_nonzero=imgdata_flat[imgdata_flat>0.0] scaledmean = (maskmean - N.mean(maskmean)) / N.std(maskmean) mean_running_diff = N.zeros(maskmad.shape) mean_running_diff = (maskmean[1:] - maskmean[:-1]) / ((maskmean[1:] + maskmean[:-1]) / 2.0) DVARS = N.zeros(fd.shape) DVARS[1:] = N.sqrt(mean_running_diff ** 2) * 100.0 N.savetxt(os.path.join(qadir, 'dvars.txt'), DVARS) badvol_index_orig = N.where((fd > FDthresh) * (DVARS > DVARSthresh))[0] #FD and DVARS simultaneously have to be bigger than 0.5!! #print badvol_index_orig #print len(badvol_index_orig) badvols = N.zeros(len(DVARS)) badvols[badvol_index_orig] = 1 badvols_expanded = badvols.copy() for i in badvol_index_orig: if i > (nback - 1): start = i - nback else: start = 0 if i < (len(badvols) - nforward): end = i + nforward + 1 else: end = len(badvols) # print i,start,end badvols_expanded[start:end] = 1 badvols_expanded_index = N.where(badvols_expanded > 0)[0] # print badvols_expanded_index if len(badvols_expanded_index) > 0: N.savetxt(os.path.join(qadir, 'scrubvols.txt'), badvols_expanded_index, fmt='%d') # make scrubing design matrix - one colum per scrubbed timepoint scrubdes = N.zeros((len(DVARS), len(badvols_expanded_index))) for i in range(len(badvols_expanded_index)): scrubdes[badvols_expanded_index[i], i] = 1 N.savetxt(os.path.join(qadir, 'scrubdes.txt'), scrubdes, fmt='%d') else: scrubdes = [] # save out complete confound file confound_mtx = N.zeros((len(DVARS), 14)) confound_mtx[:, 0:6] = motpars confound_mtx[1:, 6:12] = motpars[:-1, :] - motpars[1:, :] # derivs confound_mtx[:, 12] = fd confound_mtx[:, 13] = DVARS if not scrubdes == []: confound_mtx = N.hstack((confound_mtx, scrubdes)) N.savetxt(os.path.join(qadir, 'confound.txt'), confound_mtx) # plot_timeseries(scaledmean,'Mean in-mask signal (Z-scored)', # os.path.join(qadir,'scaledmaskmean.png'),spikes,'Potential spikes') datavars = {'imgsnr': imgsnr, 'meansfnr': meansfnr, 'spikes': spikes, 'badvols': badvols_expanded_index} if plot_data: print 'before plot' trend = plot_timeseries(maskmean, 'Mean signal (unfiltered)', os.path.join(qadir, 'maskmean.png'), plottrend=True, ylabel='Mean MR signal') print 'after plot' datavars['trend'] = trend plot_timeseries(maskmad, 'Median absolute deviation (robust SD)', os.path.join(qadir, 'mad.png'), ylabel='MAD') plot_timeseries(DVARS, 'DVARS (root mean squared signal derivative over brain mask)', os.path.join(qadir, 'DVARS.png'), plotline=0.5, ylabel='DVARS') plot_timeseries(fd, 'Framewise displacement', os.path.join(qadir, 'fd.png'), badvols_expanded_index, 'Timepoints to scrub (%d total)' % len(badvols), plotline=0.5, ylims=[0, 1], ylabel='FD') psd = matplotlib.mlab.psd(maskmean, NFFT=128, noverlap=96, Fs=1 / TR) plt.clf() fig = plt.figure(figsize=[10, 3]) fig.subplots_adjust(bottom=0.15) plt.plot(psd[1][2:], N.log(psd[0][2:])) plt.title('Log power spectrum of mean signal across mask') plt.xlabel('frequency (secs)') plt.ylabel('log power') plt.savefig(os.path.join(qadir, 'meanpsd.png'), bbox_inches='tight') plt.close() plt.clf() plt.imshow(AJKZ, vmin=0, vmax=AJKZ_thresh) plt.xlabel('timepoints') plt.ylabel('slices') plt.title('Spike measure (absolute jackknife Z)') plt.savefig(os.path.join(qadir, 'spike.png'), bbox_inches='tight') plt.close() if img.shape[0] < img.shape[1] and img.shape[0] < img.shape[2]: orientation = 'saggital' else: orientation = 'axial' mk_slice_mosaic(voxmean, os.path.join(qadir, 'voxmean.png'), 'Image mean (with mask)', contourdata=maskdata) mk_slice_mosaic(voxcv, os.path.join(qadir, 'voxcv.png'), 'Image CV') mk_slice_mosaic(voxsfnr, os.path.join(qadir, 'voxsfnr.png'), 'Image SFNR') mk_report(infile, qadir, datavars) # def save_vars(infile,qadir,datavars): datafile = os.path.join(qadir, 'qadata.csv') f = open(datafile, 'w') f.write('SNR,%f\n' % N.mean(datavars['imgsnr'])) f.write('SFNR,%f\n' % datavars['meansfnr']) # f.write('drift,%f\n'%datavars['trend'].params[1]) f.write('nspikes,%d\n' % len(datavars['spikes'])) f.write('nscrub,%d\n' % len(datavars['badvols'])) f.close() if save_sfnr: sfnrimg = nib.Nifti1Image(voxsfnr, img.get_affine()) sfnrimg.to_filename(os.path.join(qadir, 'voxsfnr.nii.gz')) return qadir
print('Statistiques ADF : {}'.format(result[0])) print('p-value : {}'.format(result[1])) print('Valeurs Critiques :') for key, value in result[4].items(): print('\t{}: {}'.format(key, value)) #Problème : p valeur n'est pas faible #La statistique ADF est loin des valeurs critiques et la p-value est supérieure au seuil (0,05). #Donc la série temporelle n’est pas stationnaire. #On regarde la tendance : from statsmodels.tsa.tsatools import detrend notrend = detrend(df.Last) df["notrend"] = notrend df.plot( y=["Last", "notrend"], figsize=(14,4)) #Prendre le logarithme de la variable dépendante est un moyen simple de réduire le taux d’augmentation de la moyenne mobile : df_log = np.log(price) plt.plot(df_log) #On refait les test : df_log.rolling(12).mean().plot(figsize=(20,10), linewidth=5, fontsize=20) plt.xlabel('time', fontsize=20); rolling_mean = df_log.rolling(window = 12).mean() rolling_std = df_log.rolling(window = 12).std()
def simple_auto_stationarize(df, verbosity=None, alpha=None, multitest=None, get_conclusions=False, get_actions=False): """Auto-stationarize the given time-series dataframe. Parameters ---------- df : pandas.DataFrame A dataframe composed solely of numeric columns. verbosity : int, logging.Logger, optional If an int is given, it is interpreted as the logging lever to use. See https://docs.python.org/3/library/logging.html#levels for details. If a logging.Logger object is given, it is used for printing instead, with appropriate logging levels. If no value is provided, the default logging.Logger behaviour is used. alpha : int, optional Family-wise error rate (FWER) or false discovery rate (FDR), depending on the method used for multiple hypothesis testing error control. If no value is provided, a default value of 0.05 (5%) is used. multitest : str, optional The multiple hypothesis testing eror control method to use. If no value is provided, the Benjamini–Yekutieli is used. See `the documesimple_auto_stationarizentation of statsmodels' multipletests method for supported values <https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html>`. get_conclusions : bool, defaults to False If set to true, a conclusions dict is returned. get_actions : bool, defaults to False If set to true, an actions dict is returned. Returns ------- results : pandas.DataFrame or dict By default, only he transformed dataframe is returned. However, if get_conclusions or get_actions are set to True, a dict is returned instead, with the following mappings: - `postdf` - Maps to the transformed dataframe. - `conclusions` - Maps to a dict mapping each column name to the arrived conclusion regarding its stationarity. - `actions` - Maps to a dict mapping each column name to the transformations performed on it to stationarize it. """ # noqa: E501 if verbosity is not None: prev_verbosity = set_verbosity_level(verbosity) if alpha is None: alpha = DEF_ALPHA logger = get_logger() logger.info("Starting to auto-stationarize a dataframe!") logger.info("Starting to check input data validity...") logger.info(f"Data shape (time, variables) is {df.shape}.") # the first axis - rows - is expected to represent the time dimension, # while the second axis - columns - is expected to represent variables; # thus, the first expected to be much longer than the second logger.info( "Checking current data orientation (rows=time, columns=variables)...") if df.shape[1] >= df.shape[0]: logger.warning(( "stationarizer's input dataframe has more columns than rows! " "Columns are expected to represent variables, while rows represent" " time steps, and thus the input dataframe is expected to have " "more rows than columns. Either the input data is inverted, or the" " data has far more variables than samples.")) else: logger.info("Data orientation is valid.") # assert all columns are numeric all_cols_numeric = all([np.issubdtype(x, np.number) for x in df.dtypes]) if not all_cols_numeric: err = ValueError( "All columns of stationarizer's input dataframe must be numeric!") logger.exception(err) # util var n = len(df.columns) # testing for unit root logger.info( ("Checking for the presence of a unit root in the input time series " "using the Augmented Dicky-Fuller test")) logger.info( ("Reminder:\n " "Null Hypothesis: The series has a unit root (value of a=1); meaning," " it is NOT stationary.\n" "Alternate Hypothesis: The series has no unit root; it is either " "stationary or non-stationary of a different model than unit root.")) adf_results = [] for colname in df.columns: srs = df[colname] result = adfuller(srs, regression='ct') logger.info( (f"{colname}: test statistic={result[0]}, p-val={result[1]}.")) adf_results.append(result) # testing for trend stationarity logger.info(( "Testing for trend stationarity of input series using the KPSS test.")) logger.info(("Reminder:\n" "Null Hypothesis (H0): The series is trend-stationarity.\n" "Alternative Hypothesis (H1): The series has a unit root.")) kpss_results = [] for colname in df.columns: srs = df[colname] result = kpss(srs, regression='ct') logger.info( (f"{colname}: test statistic={result[0]}, p-val={result[1]}.")) kpss_results.append(result) # Controling FDR logger.info( ("Controling the False Discovery Rate (FDR) using the Benjamini-" f"Yekutieli procedure with α={DEF_ALPHA}.")) adf_pvals = [x[1] for x in adf_results] kpss_pvals = [x[1] for x in kpss_results] pvals = adf_pvals + kpss_pvals by_res = multipletests( pvals=pvals, alpha=alpha, method='fdr_by', is_sorted=False, ) reject = by_res[0] corrected_pvals = by_res[1] adf_rejections = reject[:n] kpss_rejections = reject[n:] adf_corrected_pvals = corrected_pvals[:n] # noqa: F841 kpss_corrected_pvals = corrected_pvals[n:] # noqa: F841 conclusion_counts = {} def dict_inc(dicti, key): try: dicti[key] += 1 except KeyError: dicti[key] = 1 # interpret results logger.info("Interpreting test results after FDR control...") conclusions = {} actions = {} for i, colname in enumerate(df.columns): conclusion = conclude_adf_and_kpss_results( adf_reject=adf_rejections[i], kpss_reject=kpss_rejections[i]) dict_inc(conclusion_counts, conclusion) trans = CONCLUSION_TO_TRANSFORMATIONS[conclusion] conclusions[colname] = conclusion actions[colname] = trans logger.info((f"--{colname}--\n " f"ADF corrected p-val: {adf_corrected_pvals[i]}, " f"H0 rejected: {adf_rejections[i]}.\n" f"KPSS corrected p-val: {kpss_corrected_pvals[i]}, " f"H0 rejected: {kpss_rejections[i]}.\n" f"Conclusion: {conclusion}\n Transformations: {trans}.")) # making non-stationary series stationary! post_cols = {} logger.info("Applying transformations...") for colname in df.columns: srs = df[colname] if Transformation.DETREND in actions[colname]: logger.info(f"Detrending {colname} (len={len(srs)}).") srs = detrend(srs, order=1, axis=0) if Transformation.DIFFRENTIATE in actions[colname]: logger.info(f"Diffrentiating {colname} (len={len(srs)}).") srs = diff(srs, k_diff=1) post_cols[colname] = srs logger.info(f"{colname} transformed (len={len(post_cols[colname])}).") # equalizing lengths min_len = min([len(post_cols[x]) for x in post_cols]) for colname in df.columns: post_cols[colname] = post_cols[colname][:min_len] postdf = df.copy() postdf = postdf.iloc[:min_len] for colname in df.columns: postdf[colname] = post_cols[colname] logger.info(f"Post transformation shape: {postdf.shape}") for k in conclusion_counts: count = conclusion_counts[k] ratio = 100 * (count / len(df.columns)) logger.info(f"{count} series ({ratio}%) found with conclusion: {k}.") if verbosity is not None: set_verbosity_level(prev_verbosity) if not get_actions and not get_conclusions: return postdf results = {'postdf': postdf} if get_conclusions: results['conclusions'] = conclusions if get_actions: results['actions'] = actions return results
def fmriqa(infile,TR,outdir=None,maskfile=None,motfile=None,verbose=False,plot_data=True): save_sfnr=True if os.path.dirname(infile)=='': basedir=os.getcwd() infile=os.path.join(basedir,infile) elif os.path.dirname(infile)=='.': basedir=os.getcwd() infile=os.path.join(basedir,infile.replace('./','')) else: basedir=os.path.dirname(infile) if outdir==None: outdir=basedir qadir=os.path.join(outdir,'QA') if not infile.find('mcf.nii.gz')>0: error_and_exit('infile must be of form XXX_mcf.nii.gz') if not os.path.exists(infile): error_and_exit('%s does not exist!'%infile) if maskfile==None: maskfile=infile.replace('mcf.nii','mcf_brain_mask.nii') if not os.path.exists(maskfile): error_and_exit('%s does not exist!'%maskfile) if motfile==None: motfile=infile.replace('mcf.nii.gz','mcf.par') if not os.path.exists(motfile): error_and_exit('%s does not exist!'%motfile) if not os.path.exists(qadir): os.mkdir(qadir) else: print 'QA dir already exists - overwriting!' if verbose: print 'infile:',infile print 'maskfile:',maskfile print 'motfile:',motfile print 'outdir:',outdir print 'computing image stats' img=nib.load(infile) imgdata=img.get_data() nslices=imgdata.shape[2] ntp=imgdata.shape[3] maskimg=nib.load(maskfile) maskdata=maskimg.get_data() maskvox=N.where(maskdata>0) nonmaskvox=N.where(maskdata==0) if verbose: print 'nmaskvox:',len(maskvox[0]) # load motion parameters and compute FD and identify bad vols for # potential scrubbing (ala Power et al.) motpars=N.loadtxt(motfile) fd=compute_fd(motpars) N.savetxt(os.path.join(qadir,'fd.txt'),fd) voxmean=N.mean(imgdata,3) voxstd=N.std(imgdata,3) voxcv=voxstd/N.abs(voxmean) voxcv[N.isnan(voxcv)]=0 voxcv[voxcv>1]=1 # compute timepoint statistics maskmedian=N.zeros(imgdata.shape[3]) maskmean=N.zeros(imgdata.shape[3]) maskmad=N.zeros(imgdata.shape[3]) maskcv=N.zeros(imgdata.shape[3]) imgsnr=N.zeros(imgdata.shape[3]) for t in range(imgdata.shape[3]): tmp=imgdata[:,:,:,t] tmp_brain=tmp[maskvox] tmp_nonbrain=tmp[nonmaskvox] maskmad[t]=MAD(tmp_brain) maskmedian[t]=N.median(tmp_brain) maskmean[t]=N.mean(tmp_brain) maskcv[t]=maskmad[t]/maskmedian[t] imgsnr[t]=maskmean[t]/N.std(tmp_nonbrain) # perform Greve et al./fBIRN spike detection #1. Remove mean and temporal trend from each voxel. #2. Compute temporal Z-score for each voxel. #3. Average the absolute Z-score (AAZ) within a each slice and time point separately. # This gives a matrix with number of rows equal to the number of slices (nSlices) # and number of columns equal to the number of time points (nFrames). #4. Compute new Z-scores using a jackknife across the slices (JKZ). For a given time point, # remove one of the slices, compute the average and standard deviation of the AAZ across # the remaining slices. Use these two numbers to compute a Z for the slice left out # (this is the JKZ). The final Spike Measure is the absolute value of the JKZ (AJKZ). # Repeat for all slices. This gives a new nSlices-by-nFrames matrix (see Figure 8). # This procedure tends to remove components that are common across slices and so rejects motion. if verbose: print 'computing spike stats' detrended_zscore=N.zeros(imgdata.shape) detrended_data=N.zeros(imgdata.shape) for i in range(len(maskvox[0])): tmp=imgdata[maskvox[0][i],maskvox[1][i],maskvox[2][i],:] tmp_detrended=detrend(tmp) detrended_data[maskvox[0][i],maskvox[1][i],maskvox[2][i],:]=tmp_detrended detrended_zscore[maskvox[0][i],maskvox[1][i],maskvox[2][i],:]=(tmp_detrended - N.mean(tmp_detrended))/N.std(tmp_detrended) loo=sklearn.cross_validation.LeaveOneOut(nslices) AAZ=N.zeros((nslices,ntp)) for s in range(nslices): for t in range(ntp): AAZ[s,t]=N.mean(N.abs(detrended_zscore[:,:,s,t])) JKZ=N.zeros((nslices,ntp)) if verbose: print 'computing outliers' for train,test in loo: for tp in range(ntp): train_mean=N.mean(AAZ[train,tp]) train_std=N.std(AAZ[train,tp]) JKZ[test,tp]=(AAZ[test,tp] - train_mean)/train_std AJKZ=N.abs(JKZ) spikes=[] if N.max(AJKZ)>AJKZ_thresh: print 'Possible spike: Max AJKZ = %f'%N.max(AJKZ) spikes=N.where(N.max(AJKZ,0)>AJKZ_thresh)[0] if len(spikes)>0: N.savetxt(os.path.join(qadir,'spikes.txt'),spikes) voxmean_detrended=N.mean(detrended_data,3) voxstd_detrended=N.std(detrended_data,3) voxsfnr=voxmean/voxstd meansfnr=N.mean(voxsfnr[maskvox]) # create plots # #imgdata_flat=imgdata.reshape(N.prod(imgdata.shape)) #imgdata_nonzero=imgdata_flat[imgdata_flat>0.0] scaledmean=(maskmean - N.mean(maskmean))/N.std(maskmean) mean_running_diff=N.zeros(maskmad.shape) mean_running_diff=(maskmean[1:]-maskmean[:-1])/((maskmean[1:]+maskmean[:-1])/2.0) DVARS=N.zeros(fd.shape) DVARS[1:]=N.sqrt(mean_running_diff**2)*100.0 N.savetxt(os.path.join(qadir,'dvars.txt'),DVARS) badvol_index_orig=N.where((fd>FDthresh)*(DVARS>DVARSthresh))[0] #print badvol_index_orig badvols=N.zeros(len(DVARS)) badvols[badvol_index_orig]=1 badvols_expanded=badvols.copy() for i in badvol_index_orig: if i>(nback-1): start=i-nback else: start=0 if i<(len(badvols)-nforward): end=i+nforward+1 else: end=len(badvols) #print i,start,end badvols_expanded[start:end]=1 badvols_expanded_index=N.where(badvols_expanded>0)[0] #print badvols_expanded_index if len(badvols_expanded_index)>0: N.savetxt(os.path.join(qadir,'scrubvols.txt'),badvols_expanded_index,fmt='%d') # make scrubing design matrix - one colum per scrubbed timepoint scrubdes=N.zeros((len(DVARS),len(badvols_expanded_index))) for i in range(len(badvols_expanded_index)): scrubdes[badvols_expanded_index[i],i]=1 N.savetxt(os.path.join(qadir,'scrubdes.txt'),scrubdes,fmt='%d') else: scrubdes=[] # save out complete confound file confound_mtx=N.zeros((len(DVARS),14)) confound_mtx[:,0:6]=motpars confound_mtx[1:,6:12]=motpars[:-1,:]-motpars[1:,:] # derivs confound_mtx[:,12]=fd confound_mtx[:,13]=DVARS if not scrubdes==[]: confound_mtx=N.hstack((confound_mtx,scrubdes)) N.savetxt(os.path.join(qadir,'confound.txt'),confound_mtx) #plot_timeseries(scaledmean,'Mean in-mask signal (Z-scored)', # os.path.join(qadir,'scaledmaskmean.png'),spikes,'Potential spikes') datavars={'imgsnr':imgsnr,'meansfnr':meansfnr,'spikes':spikes,'badvols':badvols_expanded_index} if plot_data: print 'before plot' trend=plot_timeseries(maskmean,'Mean signal (unfiltered)',os.path.join(qadir,'maskmean.png'), plottrend=True,ylabel='Mean MR signal') print 'after plot' datavars['trend']=trend plot_timeseries(maskmad,'Median absolute deviation (robust SD)', os.path.join(qadir,'mad.png'),ylabel='MAD') plot_timeseries(DVARS,'DVARS (root mean squared signal derivative over brain mask)', os.path.join(qadir,'DVARS.png'),plotline=0.5,ylabel='DVARS') plot_timeseries(fd,'Framewise displacement',os.path.join(qadir,'fd.png'), badvols_expanded_index,'Timepoints to scrub (%d total)'%len(badvols), plotline=0.5,ylims=[0,1],ylabel='FD') psd=matplotlib.mlab.psd(maskmean,NFFT=128,noverlap=96,Fs=1/TR) plt.clf() fig=plt.figure(figsize=[10,3]) fig.subplots_adjust(bottom=0.15) plt.plot(psd[1][2:],N.log(psd[0][2:])) plt.title('Log power spectrum of mean signal across mask') plt.xlabel('frequency (secs)') plt.ylabel('log power') plt.savefig(os.path.join(qadir,'meanpsd.png'),bbox_inches='tight') plt.close() plt.clf() plt.imshow(AJKZ,vmin=0,vmax=AJKZ_thresh) plt.xlabel('timepoints') plt.ylabel('slices') plt.title('Spike measure (absolute jackknife Z)') plt.savefig(os.path.join(qadir,'spike.png'),bbox_inches='tight') plt.close() if img.shape[0]<img.shape[1] and img.shape[0]<img.shape[2]: orientation='saggital' else: orientation='axial' mk_slice_mosaic(voxmean,os.path.join(qadir,'voxmean.png'),'Image mean (with mask)',contourdata=maskdata) mk_slice_mosaic(voxcv,os.path.join(qadir,'voxcv.png'),'Image CV') mk_slice_mosaic(voxsfnr,os.path.join(qadir,'voxsfnr.png'),'Image SFNR') mk_report(infile,qadir,datavars) # def save_vars(infile,qadir,datavars): datafile=os.path.join(qadir,'qadata.csv') f=open(datafile,'w') f.write('SNR,%f\n'%N.mean(datavars['imgsnr'])) f.write('SFNR,%f\n'%datavars['meansfnr']) #f.write('drift,%f\n'%datavars['trend'].params[1]) f.write('nspikes,%d\n'%len(datavars['spikes'])) f.write('nscrub,%d\n'%len(datavars['badvols'])) f.close() if save_sfnr: sfnrimg=nib.Nifti1Image(voxsfnr,img.get_affine()) sfnrimg.to_filename(os.path.join(qadir,'voxsfnr.nii.gz')) return qadir
def main(mov_stack=None, dttname="M", components='ZZ', filterid=1, pairs=[], show=False, outfile=None): db = connect() if get_config(db, name="autocorr", isbool=True): condition = "sta2 >= sta1" else: condition = "sta2 > sta1" start, end, datelist = build_movstack_datelist(db) if mov_stack != 0: mov_stacks = [ mov_stack, ] else: mov_stack = get_config(db, "mov_stack") if mov_stack.count(',') == 0: mov_stacks = [ int(mov_stack), ] else: mov_stacks = [int(mi) for mi in mov_stack.split(',')] gs = gridspec.GridSpec(len(mov_stacks), 1) plt.figure(figsize=(15, 10)) plt.subplots_adjust(bottom=0.06, hspace=0.3) first_plot = True for i, mov_stack in enumerate(mov_stacks): current = start first = True alldf = [] while current <= end: day = os.path.join('DTT', "%02i" % filterid, "%03i_DAYS" % mov_stack, components, '%s.txt' % current) if os.path.isfile(day): df = pd.read_csv(day, header=0, index_col=0, parse_dates=True) alldf.append(df) current += datetime.timedelta(days=1) alldf = pd.concat(alldf) if 'alldf' in locals(): errname = "E" + dttname alldf[dttname] *= -100 alldf[errname] *= -100 ALL = alldf[alldf['Pairs'] == 'ALL'].copy() allbut = alldf[alldf['Pairs'] != 'ALL'].copy() groups = {} groups['CRATER'] = [ "UV11", "UV15", "FJS", "FLR", "SNE", "UV12", "FOR", "RVL", "UV06" ] groups['GPENTES'] = ["UV03", "UV08", "UV04", "UV02", "HDL"] groups['VOLCAN'] = groups['CRATER'] + groups['GPENTES'] + [ 'HIM', 'VIL' ] plt.subplot(gs[i]) x = {} # for group in groups.keys(): # pairindex = [] # for j, pair in enumerate(allbut['Pairs']): # net1, sta1, net2, sta2 = pair.split('_') # if sta1 in groups[group] and sta2 in groups[group]: # pairindex.append(j) # tmp = allbut.iloc[np.array(pairindex)] # tmp = tmp.resample('D', how='mean') # #~ plt.plot(tmp.index, tmp[dttname], label=group) # x[group] = tmp # #tmp = x["CRATER"] - x["VOLCAN"] #~ plt.plot(tmp.index, tmp[dttname], label="Crater - Volcan") for pair in pairs: print pair pair1 = alldf[alldf['Pairs'] == pair].copy() print pair1.head() plt.plot(pair1.index, pair1[dttname], label=pair) plt.fill_between(pair1.index, pair1[dttname] - pair1[errname], pair1[dttname] + pair1[errname], zorder=-1, alpha=0.5) pair1.to_csv('%s-m%i-f%i.csv' % (pair, mov_stack, filterid)) tmp2 = allbut[dttname].resample('D', how='mean') tmp2.plot(label='mean') tmp3 = allbut[dttname].resample('D', how='median') tmp3.plot(label='median') #YA_FJS_YA_SNE #tmp2 = allbut.resample('D', 'median') py1_wmean, py1_wstd = get_wavgwstd(allbut, dttname, errname) #py1_wmean = py1_wmean.resample('D', how='median') #py1_wstd = py1_wstd.resample('D', how='mean').fillna(0.0) data = detrend(py1_wmean) #~ plt.plot(ALL.index, ALL[dttname],c='r',label='ALL: $\delta v/v$ of the mean network') #plt.plot(pair1.index, pair1[dttname], c='b',label='pair') #~ plt.plot(pair2.index, pair2[dttname], c='magenta',label='pair') #~ r = pd.rolling_mean(pair1[dttname], 30) #~ plt.plot(r.index, r, c='k') #plt.fill_between(ALL.index,ALL[dttname]-ALL[errname],ALL[dttname]+ALL[errname],lw=1,color='red',zorder=-1,alpha=0.3) plt.plot(py1_wmean.index, data, c='g', lw=1, zorder=11, label='Weighted mean of $\delta v/v$ of individual pairs') plt.fill_between(py1_wmean.index, data + py1_wstd, data - py1_wstd, color='g', lw=1, zorder=-1, alpha=0.3) plt.ylabel('$\delta v/v$ in %') if first_plot == 1: plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=4, ncol=2, borderaxespad=0.) left, right = plt.xlim() if mov_stack == 1: plt.title('1 Day') else: plt.title('%i Days Moving Window' % mov_stack) first_plot = False else: plt.xlim(left, right) plt.title('%i Days Moving Window' % mov_stack) plt.grid(True) del alldf if outfile: if outfile.startswith("?"): if len(mov_stacks) == 1: outfile = outfile.replace( '?', '%s-f%i-m%i-M%s' % (components, filterid, mov_stack, dttname)) else: outfile = outfile.replace( '?', '%s-f%i-M%s' % (components, filterid, dttname)) outfile = "dvv " + outfile print "output to:", outfile plt.savefig(outfile) if show: plt.show()
def rolling_corr_plot(index, region_pairs, window, center=True, order=None, legend_order=None, sup_title=None, detrend_series=False, diff=False, annual=False, seasonal=False, shift=1, fill_alpha=0.3): """ Calculate the rolling correlation of detrended CO2 intensity between pairs of regions. Multiple detrend methods are possible, but only the "seasonal" method is used in the final figures. inputs: index (df): dataframe with monthly co2 intensity of each region region_pairs (dict): list of tuples, where each tuple is a pair of regions to compare window (int): length of the rolling window center (bool): if the rolling correlation window should be centered order (list): order of NERC region facet windows legend order (list): order of NERC regions in legend sup_title (str): sup title to place above the facet grid detrend_series (bool): if the co2 intensity data should be detrended diff (bool): use a differencing method to detrend annual (bool): use a linear regression detrend separately on each year seasonal (bool): detrend with a 12-month rolling mean shift (int): value of shift for the diff detrend method (1 = 1 month) fill_alpha: alpha value for the 'fill_between' of regplot uncertainty """ df = index.copy() df.reset_index(inplace=True) nercs = df['nerc'].unique() df.set_index(['nerc', 'datetime'], inplace=True) df.sort_index(inplace=True) df_list = [] if detrend_series: for nerc in nercs: if diff: df.loc[idx[nerc, :], 'index (g/kwh)'] = \ diff_detrend(df.loc[idx[nerc, :], 'index (g/kwh)'], shift) if annual: df.loc[idx[nerc, :], 'index (g/kwh)'] = \ annual_detrend(df.loc[idx[nerc, :]]) if seasonal: trend = (df.loc[nerc, 'index (g/kwh)'].rolling(12, center=True).mean()) detr = df.loc[nerc, 'index (g/kwh)'] - trend detr = pd.DataFrame(detr) detr['nerc'] = nerc df_list.append(detr) else: df.loc[idx[nerc, :], 'index (g/kwh)'] = \ detrend(df.loc[idx[nerc, :], 'index (g/kwh)']) if seasonal: # Need to concat the list of dataframes df = pd.concat(df_list) df.reset_index(inplace=True) df.set_index(['nerc', 'datetime'], inplace=True) df.dropna(inplace=True) corr_df = pd.concat([(df.loc[regions[0]]['index (g/kwh)'].rolling( window, center=center).corr(df.loc[regions[1]]['index (g/kwh)'])) for regions in region_pairs], axis=1) # Create columns with the names of each region. Legacy code, but still # functional cols = [ '{} | {}'.format(regions[0], regions[1]) for regions in region_pairs ] corr_df.columns = cols # Go from wide-format to tidy corr_tidy = pd.melt(corr_df.reset_index(), id_vars=['datetime'], value_name='Correlation') corr_tidy['region1'] = corr_tidy['variable'].str.split(' | ').str[0] corr_tidy['region2'] = corr_tidy['variable'].str.split(' | ').str[-1] # Add the 0-indexed 'count' column add_count(corr_tidy) if not order: order = ['WECC', 'TRE', 'SPP', 'SERC', 'RFC', 'MRO'] if not legend_order: legend_order = ['SPP', 'TRE', 'SERC', 'MRO', 'FRCC', 'NPCC', 'WECC'] legend_len = len(legend_order) g = sns.FacetGrid(corr_tidy.dropna(), col='region1', col_wrap=2, aspect=1.2, hue='region2', palette='tab10', size=2, hue_order=legend_order) # Use regplot to get the regression line, but set scatter marker size to 0 g.map( sns.regplot, 'count', 'Correlation', scatter=False, #marker='.', truncate=True, line_kws={'lw': 2}) # regplot only does a scatter - add plt.plot for the lines g.map(plt.plot, 'count', 'Correlation') # Create custom patch lines for the legend - the default dots were small plot_colors = sns.color_palette('tab10', legend_len) legend_patches = [mlines.Line2D([], [], color=c) for c in plot_colors] legend_data = dict(zip(legend_order, legend_patches)) g.add_legend(legend_data=legend_data, title='Second Region') axes = g.axes.flatten() # Grid lines at the start of each even year from 2004-16 years = range(2004, 2017, 2) distance = 24 # months in 2 years # tick locations x_ticks = [(x * distance) + 6 for x in range(1, 8)] for ax, title in zip(axes, order): ax.set_title(title) ax.set_xticks(x_ticks) ax.set_xlim(12, None) # find PolyCollection objects (confidence intervals for regplot) and # change the alpha to make them darker for collection in ax.collections: if isinstance(collection, matplotlib.collections.PolyCollection): collection.set_alpha(fill_alpha) # Year for the ticklabels g.set_xticklabels(years, rotation=35) g.set_xlabels('Year') # Suptitle if desired if sup_title: plt.subplots_adjust(top=0.9) g.fig.suptitle(sup_title)
if 'alldf' in locals(): global dttname, errname dttname = "M0" errname = "EM0" alldf[dttname] *= -100 alldf[errname] *= -100 ALL = alldf[alldf['Pairs'] == 'ALL'].copy() allbut = alldf[alldf['Pairs'] != 'ALL'].copy() py1_wmean, py1_wstd = get_wavgwstd(allbut) py1_wmean = py1_wmean.resample('D', how='mean') py1_wstd = py1_wstd.resample('D', how='mean').fillna(0.0) data = detrend(py1_wmean) plt.subplot(gs[i]) plt.plot(ALL.index, ALL[dttname],c='r',label='ALL: $\delta v/v$ of the mean network') #plt.fill_between(ALL.index,ALL[dttname]-ALL[errname],ALL[dttname]+ALL[errname],lw=1,color='red',zorder=-1,alpha=0.3) plt.plot(py1_wmean.index, data,c='g',lw=1,zorder=11,label='Weighted mean of $\delta v/v$ of individual pairs') plt.fill_between(py1_wmean.index, data+py1_wstd,data-py1_wstd,color='g',lw=1,zorder=-1,alpha=0.3) plt.ylabel('$\delta v/v$ in %') plt.ylim(0.5, -0.5) if first_plot == 1: plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=4, ncol=1, borderaxespad=0.) left, right = plt.xlim() if mov_stack == 1:
def verify_series(series: Series) -> Series: """If a Pandas Series return it.""" if series is not None and isinstance(series, Series): return detrend(series)