def map_img_chars(b, map_array): """ Map values of array to corresponding chars, based on map_array. Returns a string array. """ r, c = b.shape[:2] bins = linspace(b.min(), b.max(), len(map_array), endpoint=False) b = digitize(b.flatten(), bins)-1 b.shape = r, c b = array(map_array)[b] return b
def get_binned_data_2d(self, n_bins = 10): # generating an instance of the class BinnedData2D bd_in = BinnedData2D() # setting the attribute source of the output equal to the source of the dataset bd_in.source = self.source # setting the filter of the output equat to the filter of the dataset bd_in.filter = self.filter # setting the number of bins of the output equal to the number of bin that the method receives as an imput bd_in.n_bins = n_bins # Extracting pi and imp database_reduced = self.db_fil.loc[:,['pi','imp']] # Generating the bin extremes bin_end_imp_pi = pl.percentile(database_reduced.pi,list(100.*pl.arange(bd_in.n_bins+1.)/(bd_in.n_bins))) # Adjusting the last bin extreme bin_end_imp_pi[-1] = bin_end_imp_pi[-1] + 0.00001 # Assigning each point to a bin database_reduced['fac_pi'] = pl.digitize(database_reduced.pi,bin_end_imp_pi) # Using a groupby in order to generate average pi and imp for each bin, assigning the output to df_imp df_gp = database_reduced[['pi','imp','fac_pi']].groupby('fac_pi') df_imp = pd.concat([df_gp.mean(),df_gp.imp.std(),df_gp.imp.count()], axis=1) df_imp.columns = ['pi','imp','stdd','nn'] # Setting the data of the output equal to the result of the binning procedure bd_in.data = df_imp # returning the filled instance of the class BinnedData2D return bd_in
icd = pyl.asarray([galaxy.ICD_IH * 100 for galaxy in galaxies]) sfr = pyl.asarray([pyl.log10(galaxy.ssfr) for galaxy in galaxies]) # plot the data f1s1.scatter(icd, sfr, c='0.8', edgecolor='0.8', s=25, label='Data') #plot the outliers for i, s in zip(icd, sfr): if s < -10: pyl.scatter(i, -10, s=100, marker=None, verts=arrow_down) if i > 50: pyl.scatter(50, s, s=100, marker=None, verts=arrow_right) bins = pyl.linspace(icd.min(), 50, 10) delta = bins[1] - bins[0] idx = pyl.digitize(icd, bins) running_median = [pyl.median(sfr[idx == k]) for k in range(10)] #upper = [scoreatpercentile(sfr[idx==k], 75) for k in range(1,7)] #lower = [scoreatpercentile(sfr[idx==k], 25) for k in range(1,7)] pyl.plot(bins - delta / 2, running_median, '#A60628', lw=4, label='Median') #pyl.plot(bins-delta/2, upper, '#348ABD', '--', lw=4, label='Quartile') #pyl.plot(bins-delta/2, lower, '#348ABD', '--', lw=4) # add the speagle relation from astLib.astCalc import tz t = tz(2.25) m = 10 sfr = (0.84 - 0.026 * t) * m - (6.51 - 0.11 * t)
inds_truepos = pylab.find(truefalse_array == 0) inds_trueneg = pylab.find(truefalse_array == 1) inds_falsepos = pylab.find(truefalse_array == 2) inds_falseneg = pylab.find(truefalse_array == 3) zspec = pylab.array(zspec) zphot = pylab.array(zphot) lmass = pylab.array(lmass) zlss = pylab.array(zlss) dzspec = (zspec - zlss) / (1 + zspec) dzphot = (zphot - zlss) / (1 + zphot) ### binning by mass digi_truepos = pylab.digitize(lmass[inds_truepos], lmassbins) digi_trueneg = pylab.digitize(lmass[inds_trueneg], lmassbins) digi_falsepos = pylab.digitize(lmass[inds_falsepos], lmassbins) digi_falseneg = pylab.digitize(lmass[inds_falseneg], lmassbins) bincount_truepos = pylab.bincount(digi_truepos, minlength=len(lmassbins)+1)[1:-1] bincount_trueneg = pylab.bincount(digi_trueneg, minlength=len(lmassbins)+1)[1:-1] bincount_falsepos = pylab.bincount(digi_falsepos, minlength=len(lmassbins)+1)[1:-1] bincount_falseneg = pylab.bincount(digi_falseneg, minlength=len(lmassbins)+1)[1:-1] n_truepos[i] += bincount_truepos n_trueneg[i] += bincount_trueneg n_falsepos[i] += bincount_falsepos n_falseneg[i] += bincount_falseneg print 'done with +/- 1.5 sigma_zphot'
### LSS members selected by zspec members_zspec = pylab.find((f.cat.use[f.inds_spatial] == 1) & (dr_pkpc_min[f.inds_spatial] <= 1500.) & (f.cat.z_spec[f.inds_spatial] > zlo_spec) & (f.cat.z_spec[f.inds_spatial] < zhi_spec)) ### LSS candidates selected by zphot members_zphot = pylab.find((f.cat.use[f.inds_spatial] == 1) & (f.cat.z_spec[f.inds_spatial] < 0) & (dr_pkpc_min[f.inds_spatial] <= 1500.) & (f.fout.z[f.inds_spatial] > zlo_phot) & (f.fout.z[f.inds_spatial] < zhi_phot)) ### binning galaxies by stellar mass digi_mass_zspec = pylab.digitize(f.fout.lmass[f.inds_spatial][members_zspec], lmassbins) digi_mass_zphot = pylab.digitize(f.fout.lmass[f.inds_spatial][members_zphot], lmassbins) ngal_bins_zspec = pylab.bincount(digi_mass_zspec, minlength=len(lmassbins)+1)[1:-1] ngal_bins_zphot = pylab.bincount(digi_mass_zphot, minlength=len(lmassbins)+1)[1:-1] ngal_bins_zphot_corr = ngal_bins_zphot * corr_factor nfinal[i] += ngal_bins_zspec + ngal_bins_zphot_corr ################### ### STAR-FORMING ###################
galaxies = filter(lambda galaxy: galaxy.ston_I > 30.0, galaxies) f1 = pyl.figure(1, figsize=(6, 4)) f1s1 = f1.add_subplot(111) icd = pyl.asarray([galaxy.ICD_IH * 100 for galaxy in galaxies]) icdc = pyl.asarray([galaxy.ICD_IH_cored * 100 for galaxy in galaxies]) y = (icd - icdc) / icd f1s1.scatter(icd, y, c="0.8", edgecolor="0.8", s=25, label="Data") f1s1.axhline(0.0, c="k", lw=1) bins = pyl.linspace(icd.min(), icd.max(), 10) delta = bins[1] - bins[0] idx = pyl.digitize(icd, bins) running_median = [pyl.median(y[idx == k]) for k in range(10)] pyl.plot(bins - delta / 2, running_median, c="#A60628", lw=4, label="Median") f1s1.set_xlim(-5, 50) f1s1.set_ylim(-1, 1) f1s1.set_xlabel(r"$\xi[i_{775},H_{160}]$ (%)") # f1s1.set_ylabel(r'$(\xi[i_{775},H_{160}]_{out} -\xi[i_{775},H_{160}])\xi[i_{775},H_{160}]$') f1s1.set_ylabel("Fractional Change") pyl.legend(loc="upper right") pyl.tight_layout() pyl.show()
galaxies = pickle.load(open("galaxies.pickle", "rb")) galaxies = filter(lambda galaxy: galaxy.sfrir != None and galaxy.ston_I > 30.0, galaxies) f = pyl.figure(1) f1 = f.add_subplot(311) f2 = f.add_subplot(312) f3 = f.add_subplot(313) for galaxy in galaxies: f1.scatter(galaxy.ICD_IH * 100, galaxy.sfrtotal / galaxy.sfr2800, c="0.8", edgecolor="0.8", s=50) # now add the medians x = pyl.asarray([galaxy.ICD_IH * 100 for galaxy in galaxies]) y = pyl.asarray([galaxy.sfrtotal / galaxy.sfr2800 for galaxy in galaxies]) bins = pyl.linspace(0, 55, 11) idx = pyl.digitize(x, bins) delta = bins[1] - bins[0] running = [pyl.median(y[idx == k]) for k in range(11)] f1.plot(bins - delta / 2, running, "r--", lw=4) galaxies = filter(lambda galaxy: 10 < galaxy.Mass < 11, galaxies) for galaxy in galaxies: f2.scatter(galaxy.ICD_IH * 100, galaxy.sfrtotal / galaxy.sfr2800, c="0.8", edgecolor="0.8", s=50) # now add the medians x = pyl.asarray([galaxy.ICD_IH * 100 for galaxy in galaxies]) y = pyl.asarray([galaxy.sfrtotal / galaxy.sfr2800 for galaxy in galaxies]) bins = pyl.linspace(0, 55, 11) idx = pyl.digitize(x, bins) delta = bins[1] - bins[0]
range=([ax2[0], ax2[1]], [ax2[2], ax2[3]])) extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]] asdf = sp2.imshow(pylab.log10(hist2d.T + 1), extent=extent, interpolation='nearest', cmap=pylab.cm.Greens) asdf.set_clim(0, pylab.log10(hist2d.max()) * 1.) sp2.set_aspect('auto') lmassbins = pylab.arange(8.75, 11.8, 0.25) lmassbars = (lmassbins[1:] + lmassbins[:-1]) / 2 zbins = pylab.arange(0.5, 1.4, 0.1) zbars = (zbins[1:] + zbins[:-1]) / 2 digi_lmass = pylab.digitize(lmass_afta_1d, lmassbins) digi_z = pylab.digitize(z_1d, zbins) dm_medians = [] dm_nmads = [] z_medians = [] z_nmads = [] for dmi in range(1, len(lmassbins)): inds = pylab.find(digi_lmass == dmi) dm_medians.append(pylab.median(lmass_b4_1d[inds] - lmass_afta_1d[inds])) dm_nmads.append(mypy.nmad(lmass_b4_1d[inds] - lmass_afta_1d[inds])) for zi in range(1, len(zbins)): inds = pylab.find(digi_z == zi) z_medians.append(pylab.median(lmass_b4_1d[inds] - lmass_afta_1d[inds]))
columns={ 'quantity': func + 'Quantity', 'price': func + 'Price', 'priceAll': func + 'PriceAll' }).fillna(0)) numFeatures += [func + 'Quantity', func + 'Price', func + 'PriceAll'] newR = newR.join( R.groupby('id').agg({ 'description': lambda x: ' '.join(x.values.astype(str)) }).rename(columns={'description': 'resource_description'})) T = T.join(newR, on='id') # if you visit the donors website, it has categorized the price by these bins: T['price_category'] = pl.digitize(T.priceAll, [0, 50, 100, 250, 500, 1000, pl.inf]) numFeatures.append('price_category') # the difference of max and min of price and quantity per item can also be relevant for c in ['Quantity', 'Price', 'PriceAll']: T['max%s_min%s' % (c, c)] = T['max%s' % c] - T['min%s' % c] numFeatures.append('max%s_min%s' % (c, c)) del Ttr, Tts, R, newR gc.collect() le = LabelEncoder() T['teacher_id'] = le.fit_transform(T['teacher_id']) T['teacher_gender_unknown'] = T.teacher_prefix.apply( lambda x: int(x not in ['Ms.', 'Mrs.', 'Mr.'])) numFeatures += [ 'teacher_number_of_previously_posted_projects', 'teacher_id',
### calculating MFs dm = 0.25 lmassbins = pylab.arange(9.5 - dm / 2., 11.5 + dm, dm) lmassbars = (lmassbins[1:] + lmassbins[:-1]) / 2. xlo, xhi = 25. * 100. / dx_map, 100 - 25. * 100. / dx_map ylo, yhi = 25. * 100. / dy_map, 100 - 25. * 100. / dy_map inds0 = pylab.find((overdens_arr > 0.0) & (overdens_arr < 0.5) & (simdata.x_cMpc > xlo) & (simdata.x_cMpc < xhi) & (simdata.y_cMpc > ylo) & (simdata.y_cMpc < yhi)) inds1 = pylab.find((overdens_arr > 0.5) & (overdens_arr < 1.0) & (simdata.x_cMpc > xlo) & (simdata.x_cMpc < xhi) & (simdata.y_cMpc > ylo) & (simdata.y_cMpc < yhi)) inds2 = pylab.find((overdens_arr > 1.0) & (overdens_arr < 1.5) & (simdata.x_cMpc > xlo) & (simdata.x_cMpc < xhi) & (simdata.y_cMpc > ylo) & (simdata.y_cMpc < yhi)) inds3 = pylab.find((overdens_arr > 1.5) & (overdens_arr < 2.0) & (simdata.x_cMpc > xlo) & (simdata.x_cMpc < xhi) & (simdata.y_cMpc > ylo) & (simdata.y_cMpc < yhi)) digi0 = pylab.digitize(pylab.log10(simdata.stellarMass[inds0]), lmassbins) digi1 = pylab.digitize(pylab.log10(simdata.stellarMass[inds1]), lmassbins) digi2 = pylab.digitize(pylab.log10(simdata.stellarMass[inds2]), lmassbins) digi3 = pylab.digitize(pylab.log10(simdata.stellarMass[inds3]), lmassbins) ngal0 = pylab.bincount(digi0, minlength=len(lmassbins) + 1)[1:-1] ngal1 = pylab.bincount(digi1, minlength=len(lmassbins) + 1)[1:-1] ngal2 = pylab.bincount(digi2, minlength=len(lmassbins) + 1)[1:-1] ngal3 = pylab.bincount(digi3, minlength=len(lmassbins) + 1)[1:-1]
subinds = pylab.find((f.cat.use[f.inds_spatial] == 1) & (f.fout.z[f.inds_spatial] > zlo) & (f.fout.z[f.inds_spatial] < zhi)) subinds_massive = pylab.find((f.cat.use[f.inds_spatial] == 1) & (f.fout.z[f.inds_spatial] > zlo) & (f.fout.z[f.inds_spatial] < zhi) & (f.fout.lmass[f.inds_spatial] > 11)) if 0.19 < zlo < 0.76: for si in subinds_massive: s[fi] += ' mugshot_%05i_%s.pdf' % ( f.cat.id[f.inds_spatial][si], f.version) digi_mass = pylab.digitize(f.fout.lmass[f.inds_spatial][subinds], massbins) ngal_bins = pylab.bincount(digi_mass, minlength=len(massbins) + 1)[1:-1] nlo_poisson, nhi_poisson = [], [] for n in ngal_bins: nhi, nlo = mypy.massfunc.confidence_interval(n) nlo_poisson.append(nlo) nhi_poisson.append(nhi) nlo_poisson, nhi_poisson = pylab.array(nlo_poisson), pylab.array( nhi_poisson) phi_bins = ngal_bins * 1. / volume / dm ephi_lo = phi_bins * nlo_poisson / ngal_bins ephi_lo[pylab.isnan(ephi_lo)] = 0
########################### ## Measuring temporary impact as a function of the daily rate Q/V_D ## ## Generating evenly populated bins of \pi by means of percentile ## Assigning to each metaorder the corresponding bin in df_in.fac_pi ## Evaluating the average daily rate and impact for each bin, standard deviation and counting by means of a groupby ## Fitting a power-law and a logarithmic function ## Plotting ########################### print('Measuring temporary impact as a function of the daily rate Q/V_D ...') n_bins_imp_pi = 30 bin_end_imp_pi = pl.percentile(df_in.pi,list(100.*pl.arange(n_bins_imp_pi+1.)/(n_bins_imp_pi))) bin_end_imp_pi[-1] = bin_end_imp_pi[-1] + 0.00001 # fixing the last extreme of the bins df_in['fac_pi'] = pl.digitize(df_in.pi,bin_end_imp_pi) df_gp = df_in[['pi','imp','fac_pi']].groupby('fac_pi') df_imp_1d = pd.concat([df_gp.mean(),df_gp.imp.std(),df_gp.imp.count()], axis=1) df_imp_1d.columns = ['pi','imp','stdd','nn'] ## Fitting temporary impact as a function of the daily rate Q/V_D print('Fitting temporary impact as a function of the daily rate Q/V_D...') # fitting a power-law function ar_pl = [0., 0.3] # extremes of the grid of the starting points for the non-linear optimisation algorithm br_pl = [0., 1.] # extremes of the grid of the starting points for the non-linear optimisation algorithm def ff_pl(x, a, b): return a * pow(x,b) par_pl,vv_pl,chi_pl = fit_nonlin_1d_2p(ff_pl,df_imp_1d,ar_pl,br_pl) # fitting a logarithmic function ar_lg = [0., 0.1] # extremes of the grid of the starting points for the non-linear optimisation algorithm br_lg = [50., 500.] # extremes of the grid of the starting points for the non-linear optimisation algorithm
from astLib import astStats galaxies = pickle.load(open('galaxies.pickle', 'rb')) galaxies = filter(lambda galaxy: galaxy.ston_I >30. and galaxy.clumps != None, galaxies) f = pyl.figure(1, figsize=(6,4)) f1s1 = f.add_subplot(111) d = [[galaxy.clumps, galaxy.ICD_IH*100] for galaxy in galaxies] d = pyl.asarray(d) f1s1.scatter(d[:,0], d[:,1], s=50, c='0.8', edgecolor='0.8') bins = pyl.arange(0, 50, 5) index = pyl.digitize(d[:,1], bins) - 1 delta = bins[1] - bins[2] avgs = [pyl.mean(d[:,0][index==k]) for k in range(len(bins))] #avgs = [astStats.biweightLocation(d[:,0][index==k], 6.0) for k in range(len(bins))] #avgs = astStats.runningStatistic(d[:,1], d[:,0]) #bins = pyl.linspace(d[:,1].min(), d[:,1].max(), 10) #delta = bins[1] - bins[0] #f1s1.hlines(bins - delta/2., [0], avgs, lw=2, color='#A60628') f1s1.plot(avgs, bins - delta/2., lw=2, color='#A60628') avg=[] for i in range(9): d = [galaxy.ICD_IH*100 for galaxy in galaxies if galaxy.clumps ==i] avg.append(astStats.biweightLocation(d, 6.0))
f1 = f.add_subplot(311) f2 = f.add_subplot(312) f3 = f.add_subplot(313) for galaxy in galaxies: f1.scatter(galaxy.ICD_IH * 100, galaxy.sfrtotal / galaxy.sfr2800, c='0.8', edgecolor='0.8', s=50) # now add the medians x = pyl.asarray([galaxy.ICD_IH * 100 for galaxy in galaxies]) y = pyl.asarray([galaxy.sfrtotal / galaxy.sfr2800 for galaxy in galaxies]) bins = pyl.linspace(0, 55, 11) idx = pyl.digitize(x, bins) delta = bins[1] - bins[0] running = [pyl.median(y[idx == k]) for k in range(11)] f1.plot(bins - delta / 2, running, 'r--', lw=4) galaxies = filter(lambda galaxy: 10 < galaxy.Mass < 11, galaxies) for galaxy in galaxies: f2.scatter(galaxy.ICD_IH * 100, galaxy.sfrtotal / galaxy.sfr2800, c='0.8', edgecolor='0.8', s=50) # now add the medians x = pyl.asarray([galaxy.ICD_IH * 100 for galaxy in galaxies])