def compute_results(Nbins=16, Nbootstraps=10, method='landy-szalay', rseed=0): np.random.seed(rseed) bins = 10**np.linspace(np.log10(1. / 60.), np.log10(6), 16) results = [bins] for D in [data_red, data_blue]: results += bootstrap_two_point_angular(D['ra'], D['dec'], bins=bins, method=method, Nbootstraps=Nbootstraps) return results
def compute_results(Nbins=16, Nbootstraps=10, method='landy-szalay', rseed=0): np.random.seed(rseed) bins = 10 ** np.linspace(np.log10(1. / 60.), np.log10(6), 16) results = [bins] for D in [data_red, data_blue]: results += bootstrap_two_point_angular(D['ra'], D['dec'], bins=bins, method=method, Nbootstraps=Nbootstraps) return results
def angular_twoptcorr(x1, x2, xrange=[0.1, 1.], nbins=20, Nbootstraps=10, method='landy-szalay', rseed=0): "angular correlation using bootstraping" np.random.seed(rseed) rlogmin = math.log10(xrange[0]) rlogmax = math.log10(xrange[1]) bins = np.logspace(rlogmin, rlogmax, nbins) corr, corr_err, bootstraps = bootstrap_two_point_angular( x1, x2, bins=bins, method=method, Nbootstraps=Nbootstraps) bin_centers = 0.5 * (bins[1:] + bins[:-1]) return (bin_centers, corr, corr_err)
def crimeKdeCorr(df,inPeak=-1,step=100,bandwidth=BANDWIDTH,rad=RADIUS,flagCptCorr=0): #step in meters, unit of bandwidth depends on the unit of the lon/lat used in KDE #what's the unit of bandwidth? if i input lon/lat as radius, then bandwidth is also in radius #1 rad=1*180/pi deg=180/pi*111045 meter = 6.36E7 meters #3e-6 rad=19 meters #input the dataframe that contains only the local crimes nCrime=len(df) latmin=min(df['Latitude'].values) latmax=max(df['Latitude'].values) lonmin=min(df['Longitude'].values) lonmax=max(df['Longitude'].values) latRange=[latmin,latmax] lonRange=[lonmin,lonmax] latCent=(latRange[1]-latRange[0])/2+latRange[0] #print "lat and lon range=",latRange,lonRange #-- generate the grid points for the given lon,lat(deg) range nLatStep=np.floor((latRange[1]-latRange[0])/(step*meter2deg))+1 nLonStep=np.floor((lonRange[1]-lonRange[0])/(step*meter2deg/np.cos(latCent*deg2rad)))+1 latLst=np.linspace(latRange[0],latRange[1],num=nLatStep) lonLst=np.linspace(lonRange[0],lonRange[1],num=nLonStep) lonGrid,latGrid=np.meshgrid(lonLst,latLst) positions=np.vstack([lonGrid.ravel(),latGrid.ravel()]) values=np.vstack([df['Longitude'].values,df['Latitude'].values]) #---- test, change bandwidth--- #kernel=sp.stats.gaussian_kde(values,bw_method=1000.*meter2deg)# this is strange! kernel=sp.stats.gaussian_kde(values) #print "####bandwidth=",kernel.covariance_factor()**0.5,kernel.covariance_factor()*deg2meter**0.5 # this value makes sense! Z=np.reshape(kernel(positions).T,lonGrid.shape) #Zthresh=np.copy(Z) #locLst=Z>Z.max()*0.8 locLst=(Z==Z.max()) locPeak=np.vstack([lonGrid[locLst],latGrid[locLst]]).T print "### test, peak locations,",locPeak #---get conour #---create a wider mesh for getting contour Nwiden=20; #must be a even number widen=step*Nwiden*meter2deg; latLstwiden=np.linspace(latRange[0]-widen*0.5,latRange[1]+widen*0.5,num=nLatStep+Nwiden) lonLstwiden=np.linspace(lonRange[0]-widen*0.5,lonRange[1]+widen*0.5,num=nLonStep+Nwiden) lonGridwiden,latGridwiden=np.meshgrid(lonLstwiden,latLstwiden) positionswiden=np.vstack([lonGridwiden.ravel(),latGridwiden.ravel()]) Zwiden=np.reshape(kernel(positionswiden).T,lonGridwiden.shape) # X=lonGridwiden;Y=latGridwiden; c=cntr.Cntr(X,Y,Zwiden) seglonLst=[];seglatLst=[];segcolorLst=[] ratioLst=[0.2,0.4,0.6,0.85] #colorLst=["#00aaff","#ffff00","#ff8000","#ff3300"] # blue,yellow,red,black #colorLst=["#66ffff","#66b2ff","#0066ff","#002699"] # all blue colorLst=["#66b2ff","#004ce6","#ff6666","#cc0000"] valPeak=Z.max() if(inPeak<0): # no input peak value to use peak=valPeak else: peak=inPeak/nCrime for ir in range(len(ratioLst)): ratio=ratioLst[ir] res=c.trace(peak*ratio) nseg=len(res)//2 segments,codes=res[:nseg],res[nseg:] print "number of segments %f=%d"%(ratio,nseg) seglon,seglat=seg2list(segments) for i in range(nseg): seglonLst.append(seglon[i]) seglatLst.append(seglat[i]) segcolorLst.append(colorLst[ir]) #-- center of the 0.8 segments for i in range(0):#(nseg): lon=np.min(seglon[i])+(np.max(seglon[i])-np.min(seglon[i]))*0.5 lat=np.min(seglat[i])+(np.max(seglat[i])-np.min(seglat[i]))*0.5 loc=[[lon,lat]] #loc=[[np.mean(seglon[i]),np.mean(seglat[i])]] print "## test",loc locPeak=np.append(locPeak,loc,axis=0) print "## test , after segmetns, ceters, in lon,lat",locPeak #kdeCenterGeo=locPeak.T #print "###### shape",kdeCenterGeo.shape #kdeCenterVal=Z[locLst] flagRandom=0 if (flagCptCorr>0): #---- compute the two-point angular cross-correlation, has nothing to do with KDE #bins = np.linspace(0.000,0.01,num=21) #spacing in degrees, 0.01 deg ~ 1.11 km #bins=np.linspace(0.0,rad*meter2deg,num=21) bins=np.linspace(0.0,rad*meter2deg,num=10) binCenter = 0.5 * (bins[1:] + bins[:-1]) #--the center of the num=20 bins corResult = bootstrap_two_point_angular(df['Longitude'],df['Latitude'],bins=bins,method='landy-szalay',Nbootstraps=5) #(corr, corr_err,corr_boot) = results #-- do a linfit to the correlation curve, x is the bins, y is the corr, weight is the corr_error #print "cor results",corResult X=sm.add_constant(binCenter) linModel = sm.WLS(corResult[0], X, weights=1./corResult[1]**2) linFit = linModel.fit() #print "binCenter",binCenter #print "X",X #print "linfit result.params",linFit.params #print "linffit result.bse",linFit.bse if (FLAGPLOT==1): #---compute the two-point angular cross-correlation with randome points lonRandm=lonRange[0]+np.random.rand(nCrime)*(lonRange[1]-lonRange[0]) latRandm=latRange[0]+np.random.rand(nCrime)*(latRange[1]-latRange[0]) corResultRandm=bootstrap_two_point_angular(lonRandm,latRandm,bins=bins,method='landy-szalay',Nbootstraps=5) linModelRandm=sm.WLS(corResultRandm[0],X,weights=1./corResultRandm[1]**2) linFitRandm=linModelRandm.fit() # plt.figure(2) #--plot the linfit result, and the cor result for both local crimes and random points plt.errorbar(binCenter*deg2meter,corResult[0],yerr=corResult[1],fmt='bo') # x-distance in m, y-correlation plt.plot(binCenter*deg2meter,binCenter*linFit.params[1]+linFit.params[0],'b-') plt.errorbar(binCenter*deg2meter,corResultRandm[0],yerr=corResultRandm[1],fmt='ro') plt.plot(binCenter*deg2meter,binCenter*linFitRandm.params[1]+linFitRandm.params[0],'r-') plt.xlabel('distance (m)',fontsize=20) plt.ylabel('correlation',fontsize=20) #plt.show() #plt.close() plt.savefig("pic/two_point_correlation.png") plt.close(2) #--check the slope, take the error into consideration flagRandom=0 slopeMax=linFit.params[1]+linFit.bse[1] slopeMin=linFit.params[1]-linFit.bse[1] slopeMid=linFit.params[1] if(slopeMid>0): print "####Warning, positive or flat slope! %f+%f=%f"%(linFit.params[1],linFit.bse[1],slopeMax) flagRandom=1 # random crimes # -25.034908+29.109945=4.075037 else: print "### negative slope, has structure? %f+%f=%f"%(linFit.params[1],linFit.bse[1],slopeMax) return flagRandom,seglonLst,seglatLst,segcolorLst,locPeak,valPeak*nCrime
print "{:.2f} {:.2f}".format(bins[j], corr[j]) print 'Uniform distribution? ', np.allclose(corr, 0, atol=0.02) #bins=np.linspace(0.005,1,11) #corr=two_point_angular(cat_ngfs_non['RA'], cat_ngfs_non['DEC'], bins, method='landy-szalay') #print '\nNon nucleated dwarf galaxies' #print 'Angle(deg) w(theta)' #for j in np.arange(len(bins)-1): # print "{:.2f} {:.2f}".format(bins[j], corr[j]) #print 'Uniform distribution? ', np.allclose(corr, 0, atol=0.02) # Now the non-nucleated dwarfs result = bootstrap_two_point_angular(cat_ngfs_non['RA'], cat_ngfs_non['DEC'], bins, method='landy-szalay', Nbootstraps=5000) bin_centers = 0.5 * (bins[1:] + bins[:-1]) (corr, corr_err, bootstraps) = result print 'Non-nucleated dwarfs - Uniform distribution?', np.allclose(corr, 0, atol=0.02) fig = plt.figure(figsize=(8, 6)) plt.xlabel(r'$\theta\ (deg)$') plt.ylabel(r'$\hat{w}(\theta)$') plt.errorbar(bin_centers, corr, corr_err, fmt='.k', ecolor='gray', lw=1) plt.savefig( '../catalogs/NGFS_FCC_non_nucleated_dwarfs_two-point_correlation.pdf')
plt.plot(corr_halo[i]) ax.text(0.95, 0.95, labels[i], ha='right', va='top', transform=ax.transAxes) ax.set_xlabel(r'$\theta\ (deg)$') if i == 0: ax.set_ylabel(r'$\hat{w}(\theta)$') plt.title('SDSS Halo Stars Correlation') plt.show() #Repeat correlation calculations and plots for Gaia Halo bins = np.linspace(0.1, 10, 11) # edges for the 10 bins to evaluate corr_R_gaia_halo, R_gaia_err, R_boot = bootstrap_two_point_angular( R_gaia_halo_l, R_gaia_halo_b, bins, method='landy-szalay', Nbootstraps=10) corr_B_gaia_halo, B_gaia_err, B_boot = bootstrap_two_point_angular( B_gaia_halo_l, B_gaia_halo_b, bins, method='landy-szalay', Nbootstraps=10) bin_centers = 0.5 * (bins[1:] + bins[:-1]) plt.plot(bin_centers, corr_R_gaia_halo, 'r') plt.plot(bin_centers, corr_B_gaia_halo, 'b') plt.errorbar(bin_centers, corr_R_gaia_halo, R_gaia_err, fmt='.k', ecolor='gray', lw=1) plt.errorbar(bin_centers, corr_B_gaia_halo, B_gaia_err,