def read_array(ftype, directory, tag, dataset, numThreads=1, noH=False, physicalUnits=False, CGS=False, verbose=True): """ Args: ftype (str) directory (str) tag (str) dataset (str) numThreads (int) noH (bool) physicalUnits (bool) """ start = timeit.default_timer() files = get_files(ftype, directory, tag) if numThreads == 1: pool = schwimmbad.SerialPool() elif numThreads == -1: pool = schwimmbad.MultiPool() else: pool = schwimmbad.MultiPool(processes=numThreads) lg = partial(read_hdf5, dataset=dataset) dat = np.concatenate(list(pool.map(lg, files)), axis=0) pool.close() stop = timeit.default_timer() print("Reading in '{}' for z = {} using {} thread(s) took {}s".format( dataset, np.round(read_header(ftype, directory, tag, dataset='Redshift'), 3), numThreads, np.round(stop - start, 6))) if noH: dat = apply_hfreeUnits_conversion(files[0], dataset, dat, verbose=verbose) if physicalUnits: dat = apply_physicalUnits_conversion(files[0], dataset, dat, verbose=verbose) if CGS: dat = apply_CGSUnits_conversion(files[0], dataset, dat, verbose=verbose) return dat
def get_age(arr, z, numThreads=4): if numThreads == 1: pool = schwimmbad.SerialPool() elif numThreads == -1: pool = schwimmbad.MultiPool() else: pool = schwimmbad.MultiPool(processes=numThreads) calc = partial(get_SFT, redshift=z) Age = np.array(list(pool.map(calc, arr))) return Age
def get_age(self, arr, z, numThreads=4): if numThreads == 1: pool = schwimmbad.SerialPool() elif numThreads == -1: pool = schwimmbad.MultiPool() else: pool = schwimmbad.MultiPool(processes=numThreads) Age = self.cosmo.age(z).value - np.array( list(pool.map(self.get_star_formation_time, arr))) pool.close() return Age
def test_freqmap_multi(cache_file): with FreqMap(cache_file) as exp: with schwimmbad.MultiPool() as pool: pool.map(exp, list(range(16)), callback=exp.callback) exp.status()
def proceed2(self, subd, dates): assert isinstance(dates, list) nworkers = len(dates) assert nworkers < 127 tiles = [t for t, s in self.tg.subdmap.items() if s == subd] for date in dates: self.create_destdir(date, subd) self.allocate_empty_binfile(date, subd) tasks = [(date, subd, t) for t in tiles for date in dates] pool = schwimmbad.MultiPool(processes=nworkers + 1) tic = time.time() pool.map(self.task_extract_convert, tasks) toc = time.time() elapsed = toc - tic print( f"time to convert {len(tasks)} tiles with {nworkers} workers: {elapsed:.4} s" ) pool.close() for date in dates: self.cleantar(date, subd)
def get_flux_all(kappa, tag, BC_fac, IMF='Chabrier_300', inp='FLARES', filters=flare.filters.NIRCam, Type='Total', log10t_BC=7., extinction='default', orientation="sim", numThreads=8): print(f"Getting fluxes for tag {tag} with kappa={kappa}") if inp == 'FLARES': df = pd.read_csv('../weight_files/weights_grid.txt') weights = np.array(df['weights']) sims = np.arange(0, len(weights)) calc = partial(get_flux, kappa=kappa, tag=tag, BC_fac=BC_fac, IMF=IMF, inp=inp, filters=filters, Type=Type, log10t_BC=log10t_BC, extinction=extinction, orientation=orientation) pool = schwimmbad.MultiPool(processes=numThreads) out = np.array(list(pool.map(calc, sims))) pool.close() else: out = get_flux(00, kappa=kappa, tag=tag, BC_fac=BC_fac, IMF=IMF, inp=inp, filters=filters, Type=Type, log10t_BC=log10t_BC, extinction=extinction, orientation=orientation) return out
def get_data(tiles, date, hour): if ismaster: print(f"read HIST {date}:{hour:02} #tiles: {len(tiles)} in {subds}", flush=True) tasks = [(tile, hour, date) for tile in tiles] pool = schwimmbad.MultiPool(processes=nthreads) data = pool.map(proceed_tile, tasks) pool.close() return data
def do_frame(self, domain_args, data_args): task = (self.hmaps[self.index], domain_args, data_args) self.videotasks += [task] # data_args] self.index += 1 if self.index == self.nthreads: pool = schwimmbad.MultiPool(processes=self.nthreads+1) pool.map(proceed_frame, self.videotasks) pool.close() self.index = 0 self.videotasks = []
def get_allcorners(): import schwimmbad pool = schwimmbad.MultiPool() res = pool.map(get_corner, alltiles) pool.close() corners = {} for tile in alltiles: corners[tile] = res[tile] return corners
def setup_pool(self): """ If threads > 1, setup a MultiPool, else run in serial mode """ if self.threads > 1: import schwimmbad logger.info("Creating MultiPool with {} processes".format(self.threads)) self.pool = schwimmbad.MultiPool( self.threads, initializer=init, initargs=(self.likelihood, self.priors) ) else: self.pool = None
def pread(self, args): varname, tiles = args # if not self.has_threads: # self.pool = schwimmbad.MultiPool(processes=self.nthreads+1) # self.has_treads = True tasks = [(self.readers[self.subdmap[tile]], varname, tile) for tile in tiles] pool = schwimmbad.MultiPool(processes=self.nthreads) data = pool.map(read_grid, tasks) pool.close() return data
def get_all(dataset, tag): df = pd.read_csv('weight_files/weights_grid.txt') weights = np.array(df['weights']) sims = np.arange(0,len(weights)) calc = partial(get_data, tag = tag, dataset = dataset) pool = schwimmbad.MultiPool(processes=12) dat = np.array(list(pool.map(calc, sims))) pool.close() return dat
def get_line_all(tag, line, inp='FLARES', LF=True, bins=np.arange(40, 46, 0.5), Type='DustModelI'): if inp == 'FLARES': df = pd.read_csv('weight_files/weights_grid.txt') weights = np.array(df['weights']) sims = np.arange(0, len(weights)) calc = partial(get_line, tag=tag, line=line, inp=inp, LF=LF, bins=bins, Type=Type) pool = schwimmbad.MultiPool(processes=12) dat = np.array(list(pool.map(calc, sims))) pool.close() if LF: hist = np.sum(dat, axis=0) out = np.zeros(len(bins) - 1) err = np.zeros(len(bins) - 1) for ii, sim in enumerate(sims): out += dat[ii] * weights[ii] err += np.square(np.sqrt(dat[ii]) * weights[ii]) return out, hist, np.sqrt(err) else: return dat else: out = get_line(00, tag=tag, line=line, inp=inp, LF=LF, bins=bins, Type=Type) return out
def mypool(func, tasks, nworkers=0, maxworkers=31): if nworkers == 0: nworkers = min(len(tasks), maxworkers) # https://stackoverflow.com/questions/62186218/python-multiprocessing-attributeerror-cant-pickle-local-object # if isinstance(tasks[0], tuple): # f = Unpack(func) # else: # f = func pool = schwimmbad.MultiPool(processes=nworkers+1) data = pool.map(Unpack(func), tasks) pool.close() return data
def convert_fulltar(date, subd): tasks = [(date, subd, t) for t, s in giga.subdmap.items() if s == subd] create_destdir(date, subd) allocate_empty_binfile(date, subd) pool = schwimmbad.MultiPool(processes=nworkers + 1) tic = time.time() pool.map(task_extract_convert, tasks) toc = time.time() elapsed = toc - tic print( f"time to convert {len(tasks)} tiles with {nworkers} workers: {elapsed:.4} s" ) pool.close() cleantar(date, subd)
def get_all(tag, inp='FLARES', filter='FUV', Luminosity='DustModelI'): if inp == 'FLARES': sims = np.arange(0, 40) calc = partial(get_data, tag=tag, inp=inp, filter=filter, Luminosity=Luminosity) pool = schwimmbad.MultiPool(processes=12) dat = np.array(list(pool.map(calc, sims))) pool.close() return dat
def multiproc_vca(subcube_locs, channels, output_loc, fig_loc, dimensions): """ subcube_locs must be a list containing the string preceding the dimensions and subcube details. e.g. everything before '..._7x7_x1_y2.fits' can be a list of multiple prefixes if needed. arrayloc=/priv/myrtle1/gaskap/nickill/smc/vca/turbustatoutput/simcube_him_7x7_avatar channels should be input as a list and be factors of the total channel range in decreasing order e.g. [32,16,8,4,2,1] """ with schwimmbad.MultiPool() as pool: print('started multi processing') print(datetime.datetime.now()) #create the lists for multiprocessing #vcacube=[f'{subcube_locs}_{dimensions}x{dimensions}_x{i}_y{j}.fits' for j in np.arange(0,dimensions) for i in np.arange(0,dimensions)] vcacube = [ f'{k}_{dimensions}x{dimensions}_x{i}_y{j}.fits' for k in subcube_locs for j in np.arange(0, dimensions) for i in np.arange(0, dimensions) ] chansamps = [ channels for j in np.arange(0, dimensions) for k in subcube_locs for i in np.arange(0, dimensions) ] #arrayloc=[f'{output_loc}_{dimensions}x{dimensions}_x{i}_y{j}' for j in np.arange(0,dimensions) for i in np.arange(0,dimensions)] arrayloc = [ f'{k}_{dimensions}x{dimensions}_x{i}_y{j}' for k in output_loc for j in np.arange(0, dimensions) for i in np.arange(0, dimensions) ] #figloc=[f'{fig_loc}_{dimensions}x{dimensions}_x{i}_y{j}' for j in np.arange(0,dimensions) for i in np.arange(0,dimensions)] figloc = [ f'{k}_{dimensions}x{dimensions}_x{i}_y{j}' for k in fig_loc for j in np.arange(0, dimensions) for i in np.arange(0, dimensions) ] inputs = list(zip(vcacube, chansamps, arrayloc, figloc)) print(f'THESE ARE THE INPUTS FOR MULTIPROCESSING:{inputs}') out = list(pool.map(do_vca, inputs)) print('finished multiprocessing') print(datetime.datetime.now()) print(out)
def get_lum_all(kappa, tag, BC_fac, IMF='Chabrier_300', bins=np.arange(-24, -16, 0.5), inp='FLARES', LF=True, filters=('FAKE.TH.FUV'), Type='Total', log10t_BC=7., extinction='default', orientation="sim", numThreads=8, masslim=None): print(f"Getting luminosities for tag {tag} with kappa={kappa}") if inp == 'FLARES': df = pd.read_csv('../weight_files/weights_grid.txt') weights = np.array(df['weights']) sims = np.arange(0, len(weights)) calc = partial(get_lum, kappa=kappa, tag=tag, BC_fac=BC_fac, IMF=IMF, bins=bins, inp=inp, LF=LF, filters=filters, Type=Type, log10t_BC=log10t_BC, extinction=extinction, orientation=orientation, masslim=masslim) pool = schwimmbad.MultiPool(processes=numThreads) dat = np.array(list(pool.map(calc, sims))) pool.close() if LF: hist = np.sum(dat, axis=0) out = np.zeros(len(bins) - 1) err = np.zeros(len(bins) - 1) for ii, sim in enumerate(sims): err += np.square(np.sqrt(dat[ii]) * weights[ii]) out += dat[ii] * weights[ii] return out, hist, np.sqrt(err) else: return dat else: out = get_lum(00, kappa=kappa, tag=tag, BC_fac=BC_fac, IMF=IMF, bins=bins, inp=inp, LF=LF, filters=filters, Type=Type, log10t_BC=log10t_BC, extinction=extinction, orientation=orientation, masslim=masslim) return out
def choose_pool(parallel): """ This is a remake of schwimmbad.choose_pool with a single argument. """ # TODO: This function should be refactored as a factory class with methods # to enable more thorough testing of imports, MPI behaviour, etc. if hasattr(parallel, 'map'): # user-defined pool pool = parallel elif parallel is None: # serial calculation - define dummy pool pool = NonePool() elif NO_SCHWIMMBAD: raise DependencyMissing( 'schwimmbad', "To perform inference calculations in parallel, install schwimmbad" " with \'conda install -c conda-forge schwimmbad\' or define your " "Strategy object with a 'parallel' keyword argument that is a " "multiprocessing.Pool object. To run serial calculations instead, " "pass in parallel=None.") elif isinstance(parallel, int): pool = schwimmbad.MultiPool(parallel) elif parallel is 'all': threads = os.cpu_count() pool = choose_pool(threads) elif parallel is 'mpi': pool = schwimmbad.MPIPool() # need to kill all non-master instances of currently running script if not pool.is_master(): pool.wait() sys.exit(0) elif parallel is 'auto': # try mpi, otherwise go for multiprocessing if schwimmbad.MPIPool.enabled(): pool = choose_pool('mpi') else: pool = choose_pool('all') else: raise TypeError("Could not interpret 'parallel' argument. Use an " "integer, 'mpi', 'all', 'auto', None or pass a pool " "object with 'map' method.") return pool
def get_data_all(tag, dataset='Mstar_30', bins=np.arange(7.5, 12, 0.5), inp='FLARES', DF=False): if inp == 'FLARES': df = pd.read_csv('weight_files/weights_grid.txt') weights = np.array(df['weights']) sims = np.arange(0, len(weights)) calc = partial(get_data, tag=tag, dataset=dataset, bins=bins, inp=inp, DF=DF) pool = schwimmbad.MultiPool(processes=12) dat = np.array(list(pool.map(calc, sims))) pool.close() if DF: hist = np.sum(dat, axis=0) out = np.zeros(len(bins) - 1) err = np.zeros(len(bins) - 1) for ii, sim in enumerate(sims): out += dat[ii] * weights[ii] err += np.square(np.sqrt(dat[ii]) * weights[ii]) return out, hist, np.sqrt(err) else: return dat else: out = get_data(00, tag=tag, bins=bins, inp=inp, DF=DF) return out
def partial(self, subd, tiles, dates, nworkers=31): assert isinstance(dates, list) assert isinstance(tiles, list) for date in dates: self.create_destdir(date, subd) self.allocate_empty_binfile(date, subd) tasks = [(date, subd, t) for t in tiles for date in dates] pool = schwimmbad.MultiPool(processes=nworkers + 1) tic = time.time() pool.map(self.task_extract_convert, tasks) toc = time.time() elapsed = toc - tic print( f"time to convert {len(tasks)} tiles with {nworkers} workers: {elapsed:.4} s" ) pool.close() for date in dates: self.cleantar(date, subd)
def readalltiles(reader, varname, hour, date, parallel=False): ny, nx = 140, 105 data = np.zeros((ny * 100, nx * 100)) ntiles = len(reader.tiles) if parallel: nworkers = 16 pool = schwimmbad.MultiPool(processes=nworkers + 1) tasks = [(varname, tile, hour, date) for tile in reader.tiles] chunk = pool.map(reader.read, tasks) pool.close() else: chunk = np.zeros((ntiles, ny, nx)) for k, tile in enumerate(reader.tiles): print(f"\rsubd:{subd:02} tile:{tile}", end="") chunk[k] = reader.read(varname, tile, hour, date) for k, tile in enumerate(reader.tiles): j0, i0 = ny * (tile // 100), nx * (tile % 100) data[j0:j0 + ny, i0:i0 + nx] = chunk[k] return data
def get_all(tag, bins=np.arange(-25, -16, 0.5), inp='GEAGLE', filter='FUV', Luminosity='Dustcorr'): if inp == 'GEAGLE': sims = np.arange(0, 38) df = pd.read_csv('weight_files/weights_grid.txt') weights = np.array(df['weights']) calc = partial(get_hist, tag=tag, bins=bins, inp=inp, filter=filter, Luminosity=Luminosity) pool = schwimmbad.MultiPool(processes=12) dat = np.array(list(pool.map(calc, sims))) pool.close() hist = np.sum(dat, axis=0) out = np.zeros(len(bins) - 1) for ii, sim in enumerate(sims): out += dat[ii] * weights[ii] return out, hist else: out = get_hist(00, tag, bins, inp='REF') return out
import schwimmbad import numpy as np def func(i): ''' A useless function ''' print(str(i + 1)) return i # Use multipool - same as multiprocessing with schwimmbad.MultiPool() as pool: inputs = [i for i in np.arange(0, 10, 2)] out1 = list(pool.map(func, inputs)) # Use serial pool with schwimmbad.SerialPool() as pool: inputs = [i for i in np.arange(10, 20, 2)] out2 = list(pool.map(func, inputs)) print(out1, out2)
except: print (F"No data available in {ii}") return np.array([]), np.array([]), np.array([]), np.array([]), np.array([]), np.array([]) tag='010_z005p000' z = float(tag[5:].replace('p','.')) sim_type='FLARES' inp = int(sys.argv[1]) # mstar, lfuv, lfuv_int, att, begin, end, slen = get_data(0, tag, inp = 'FLARES') func = partial(get_data, tag=tag, inp = 'FLARES') pool = schwimmbad.MultiPool(processes=8) dat = np.array(list(pool.map(func, np.arange(0,40)))) pool.close() for ii in range(40): if ii==0: mstar = dat[ii][0] lfuv = dat[ii][1] lfuv_int = dat[ii][2] slen = dat[ii][-1] num = np.append(np.array([0]), len(dat[ii][0])) part_num = np.arange(0, len(dat[ii][0])) else: mstar = np.append(mstar, dat[ii][0]) lfuv = np.append(lfuv, dat[ii][1]) lfuv_int = np.append(lfuv_int, dat[ii][2])
def featurize_data(pbmap, pbnames, lcdata, metadata, nobjects, featurefile): """ ***Feature extractor for PLaSTiCC*** Extracts features from data by some Cesium library functions. Builds a timeseries dictionary and for each time series extracts features. Features described in file: feature_sets. Created on Mon Apr 29 19:30:52 2019 @author: luisarribas """ print("") print("EXTRACTING FEATURES") print("===================") print("") print("Building Timeseries....wait") print("===========================") #**********************BUILD TIME SERIES********************************** tsdict = OrderedDict() for i in range(nobjects): row = metadata[i] thisid = row['object_id'] target = row['target'] meta = {'zBand':row['zBand'],\ 'z':row['hostgal_photoz'],\ 'zerr':row['hostgal_photoz_err'],\ 'mag':row['magnitude'],\ 'u-b':row['u-b'],\ 'b-v':row['b-v'] } ind = (lcdata['object_id'] == thisid) thislc = lcdata[ind] pbind = [(thislc['passband'] == pb) for pb in pbmap] t = [thislc['mjd'][mask].data for mask in pbind] m = [thislc['flux'][mask].data for mask in pbind] e = [thislc['flux_err'][mask].data for mask in pbind] tsdict[thisid] = TimeSeries(t=t, m=m, e=e,\ label=target, name=thisid, meta_features=meta,\ channel_names=pbnames ) print("") print("OK!") print(" ") #***********************FEATURE EXTRACTION WITH CESIUM******************** warnings.simplefilter('ignore') if os.path.exists(featurefile): print("") print("Loading features from file....wait") print("==================================") featuretable, _ = featurize.load_featureset(featurefile) print("") print("OK!") print(" ") else: features_list = [] print("") print("Computing features....wait") print("==========================") with schwimmbad.MultiPool() as pool: results = pool.imap(worker, list(tsdict.values())) for res in results: features_list.append(res) featuretable = featurize.assemble_featureset(features_list=features_list,\ time_series=tsdict.values()) featurize.impute_featureset(fset=featuretable, strategy='constant', value=0, max_value=18446744073709551000, inplace=True) featurize.save_featureset(fset=featuretable, path=featurefile) print("") print("OK!") print(" ") #*******Build Pandas dataframe output************************************* old_names = featuretable.columns.values new_names = ['{}_{}'.format(x, pbmap.get(y, 'meta')) for x, y in old_names] cols = [featuretable[col] for col in old_names] allfeats = Table(cols, names=new_names, masked=False) allfeats['target'] = metadata['target'] allfeats = allfeats.to_pandas() allfeats = np.nan_to_num(allfeats) new_names.append('target') allfeats = Table(allfeats, names=new_names, masked=False) allfeats = allfeats.to_pandas() print("") print("Extracted features = ", len(allfeats.columns)) print("==========================") print("") print("Nan Values detected = ", sum(len(allfeats) - allfeats.count())) print("==========================") return allfeats
def line_write_out(num, lines, tag, kappa, BC_fac, label, inp = 'FLARES', LF = False, log10t_BC = 7., Type = 'Total', extinction = 'default', data_folder = 'data', aperture='30'): if inp == 'FLARES': num = str(num) if len(num) == 1: num = '0'+num filename = F"./{data_folder}/FLARES_{num}_sp_info.hdf5" sim_type = inp elif (inp == 'REF') or (inp == 'AGNdT9'): filename = F"./{data_folder}/EAGLE_{inp}_sp_info.hdf5" sim_type = 'PERIODIC' num='00' else: ValueError(F"No input option of {inp}") calc = partial(get_lines, sim=num, kappa = kappa, tag = tag, BC_fac = BC_fac, inp = inp, IMF = 'Chabrier_300', LF = False, log10t_BC = log10t_BC, Type = Type, extinction = extinction, data_folder = data_folder, aperture = aperture) pool = schwimmbad.MultiPool(processes=8) dat = np.array(list(pool.map(calc, lines))) pool.close() for ii, line in enumerate(lines): out_lum = dat[:,0][ii] out_EW = dat[:,1][ii] if Type == 'Total': fl = flares.flares(fname = filename, sim_type = sim_type) fl.create_group(F"{tag}/Galaxy/BPASS_2.2.1/Chabrier300/Lines/DustModelI", verbose=True) print (F'{line} is being written to disk') fl.create_dataset(values = out_lum, name = F"{line}/Luminosity", group = F"{tag}/Galaxy/BPASS_2.2.1/Chabrier300/Lines/DustModelI", desc = F"Dust corrected luminosity (using ModelI) of the galaxy with a birth cloud factor of {BC_fac} following {extinction} curve", unit = "ergs/s", overwrite=True) fl.create_dataset(values = out_EW, name = F"{line}/EW", group = F"{tag}/Galaxy/BPASS_2.2.1/Chabrier300/Lines/DustModelI", desc = F"EW (using ModelI) of the galaxy with a birth cloud factor of {BC_fac} following {extinction} curve", unit = "Angstrom", overwrite=True) elif Type=='Intrinsic': fl = flares.flares(fname = filename, sim_type = sim_type) fl.create_group(F"{tag}/Galaxy/BPASS_2.2.1/Chabrier300/Lines/{label}", verbose=True) print (F'{line} is being written to disk') fl.create_dataset(values = out_lum, name = F"{line}/Luminosity", group = F"{tag}/Galaxy/BPASS_2.2.1/Chabrier300/Lines/{label}", desc = F"Intrinsic line luminosity", unit = "ergs/s", overwrite=True) fl.create_dataset(values = out_EW, name = F"{line}/EW", group = F"{tag}/Galaxy/BPASS_2.2.1/Chabrier300/Lines/{label}", desc = F"Intrinsic EW", unit = "Angstrom", overwrite=True) elif Type=='Only-BC': fl = flares.flares(fname = filename, sim_type = sim_type) fl.create_group(F"{tag}/Galaxy/BPASS_2.2.1/Chabrier300/Lines/{label}", verbose=True) print (F'{line} is being written to disk') fl.create_dataset(values = out_lum, name = F"{line}/Luminosity", group = F"{tag}/Galaxy/BPASS_2.2.1/Chabrier300/Lines/{label}", desc = F"Intrinsic line luminosity with birth cloud factor {BC_fac} following {extinction} curve", unit = "ergs/s", overwrite=True) fl.create_dataset(values = out_EW, name = F"{line}/EW", group = F"{tag}/Galaxy/BPASS_2.2.1/Chabrier300/Lines/{label}", desc = F"Intrinsic EW with birth cloud factor {BC_fac} following {extinction} curve", unit = "Angstrom", overwrite=True)
err = [] # Initialize three list objects for generating RVData. for x in range(len(data)): # Assign values in corresponding columns to individual lists. t.append(data[x][0]) rv.append(data[x][1]) err.append(data[x][2]) rv = rv * u.km/u.s err = err * u.km/u.s return(t,rv,err) t,rv,err = readAndGenerateData() data = RVData(t=t, rv=rv, stddev=err) params = JokerParams(P_min=8*u.day, P_max=512*u.day) pool = schwimmbad.MultiPool() joker = TheJoker(params, pool=pool) samples = joker.rejection_sample(data, n_prior_samples=SAMPLE_SIZE) fig, ax = plt.subplots(1, 1, figsize=(6,6)) # doctest: +SKIP ax.scatter(samples['P'].value, samples['K'].to(u.km/u.s).value, marker='.', color='k', alpha=0.45) # doctest: +SKIP ax.set_xlabel("$P$ [day]") ax.set_ylabel("$K$ [km/s]") ax.set_xlim(-10, 200) ax.set_ylim(0.75,12.) ax.scatter(61.942, 1.3959, marker='o', color='#31a354', zorder=-100) fig, ax = plt.subplots(1, 1, figsize=(8,5)) # doctest: +SKIP
def main(): p0 = [ cl_c.phi2.degree, cl_c.distance.kpc, cl_c.pm_phi1_cosphi2.value, cl_c.pm_phi2.value, cl_c.radial_velocity.value, np.log(mw['halo'].parameters['m'].value), 1, np.log(mw['disk'].parameters['m'].value) ] # 11.1, 232.24, 7.25, # HACK: optimized values from below: p0 = np.array([ 3.64098650e-02, 20.3, 3.83442589e+00, 6.92904214e-01, -5.58127521e+01, 2.76771005e+01, 1.11980719e+00, 2.49729735e+01 ]) data_units = { 'phi1': u.deg, 'phi2': u.deg, 'distance': u.kpc, 'pm_phi1_cosphi2': u.mas / u.yr, 'pm_phi2': u.mas / u.yr, 'radial_velocity': u.km / u.s } extra_var = dict() extra_var['phi2'] = (0.05 * u.deg)**2 extra_var['distance'] = (0.2 * u.kpc)**2 extra_var['pm_phi1_cosphi2'] = (0.1 * u.mas / u.yr)**2 extra_var['pm_phi2'] = (0.1 * u.mas / u.yr)**2 extra_var['radial_velocity'] = (1 * u.km / u.s)**2 frame_comp_names = ( list(cl_c.get_representation_component_names().keys()) + list(cl_c.get_representation_component_names('s').keys())) _extra_var = dict() for k in extra_var: _extra_var[k] = extra_var[k].to_value(data_units[k]**2) args = (cl_c.phi1, mw, data, data_units, frame_comp_names, _extra_var) # Test likelihood call: ln_likelihood(p0, *args) # Run emcee nwalkers = 128 * len(p0) # p0s = emcee.utils.sample_ball(res.x) p0s = emcee.utils.sample_ball(p0, std=[1e-3] * len(p0), size=nwalkers) with schwimmbad.MultiPool() as pool: sampler = emcee.EnsembleSampler(nwalkers, len(p0), ln_likelihood, args=args, pool=pool) for sample in sampler.sample(p0s, iterations=1024, progress=True): # Update plot every 32 steps if sampler.iteration % 32: continue fig = trace_plot(sampler) fig.savefig('../plots/pal5_emcee_trace.png', dpi=250) plt.close(fig)
#hisdates = [giga.hisdates[i+1+idate] for i in range(ndates)] hisdates = giga.hisdates[idate:] hisdates = ["2008-09-26"] regions = list(range(11, 14)) for date in hisdates: nworkers = 31 tasks = [] for subd in regions: tasks += [(date, subd, t) for t, s in giga.subdmap.items() if s == subd] create_destdir(date, subd) allocate_empty_binfile(date, subd) pool = schwimmbad.MultiPool(processes=nworkers + 1) tic = time.time() pool.map(task_extract_convert, tasks) toc = time.time() elapsed = toc - tic print( f"time to convert {len(tasks)} tiles with {nworkers} workers: {elapsed:.4} s" ) pool.close() cleantar(date, subd) exit() # time to extract all tiles from region 10 with 20 workers: 500.4 s (task_extract) nworkers = 20 tasks = []