def define_tiles(): argo = tools.read_argodb() midlon = (dright - dleft) / 2. tile = {} tile['ID'] = "0" tile['LONMIN'] = -180. - dleft #-1./17 tile['LONMAX'] = midlon #1./127. tile['LATMIN'] = -90. - dleft #-1./17 tile['LATMAX'] = 90. + dright #+1./19 tile0 = tile.copy() tile1 = tile.copy() tile1['ID'] = "1" tile1['LONMIN'] = midlon #1./127 tile1['LONMAX'] = 180. + dright #+1./17 twotile = [(tile0, extract_in_tile(argo, tile0)), (tile1, extract_in_tile(argo, tile1))] tiles = setup_tiling(twotile, maxload) bb = convert_tiles_to_dict(tiles) write_tiles(bb) split_global_into_tiles(bb, argo) plot_tiles(bb, maxload)
def create_empty_netcdf_file(ncfile, lat, lon, set_latest=False): with Dataset(ncfile, "w") as nc: nc.createDimension("lon", len(lon)) nc.createDimension("lat", len(lat)) nc.createDimension("depth", len(tools.zref)) v = nc.createVariable("lon", float, ("lon",)) v.long_name = "longitude" v.units = "degrees_east" v = nc.createVariable("lat", float, ("lat",)) v.long_name = "latitude" v.units = "degrees_north" v = nc.createVariable("depth", float, ("depth",)) v.long_name = "depth" v.units = "m" v.positive = "down" if set_latest: argo = tools.read_argodb() attributes = set_latest_argo_profile(argo) else: attributes = global_attributes # replace whitespaces with underscores in attribute names d = {} for k, v in attributes.items(): newk = k.replace(" ", "_") d[newk] = v nc.setncatts(d) for var in stats.var_stats: at = stats.attributes[var] v = nc.createVariable(var, np.float64, ("depth", "lat", "lon"), fill_value = fill_value) v.long_name = at[0] v.units = at[1] v.valid_min = at[2][0] v.valid_max = at[2][1] with Dataset(ncfile, "a") as nc: nc.variables["lon"][:] = lon nc.variables["lat"][:] = lat nc.variables["depth"][:] = tools.zref
def update_with_new_wmos(): dacs, wmos = tools.get_all_wmos() # if debug: # dacs = dacs[::100] # wmos = wmos[::100] argo = tools.read_argodb() all_wmos = set(wmos) known_wmos = set(argo.WMO) new = list(all_wmos.difference(known_wmos)) new_dacs = [d for d, w in zip(dacs, wmos) if w in new] new_wmos = [w for w in wmos if w in new] if len(new_wmos) > 200: raise ValueError("Too many new wmos, update manually") a = tools.update_argodb(argo, new_dacs, new_wmos) tools.write_argodb(a)
def master_job(nslaves, resume=False): # define the master director master = mns.Master(nslaves) argo = tools.read_argodb() bb = tiles.read_tiles() keys = list(bb.keys()) work = workload(bb) if resume: missing = [ k for k in keys if not (os.path.exists(interp.tiles_profiles_file % (interp.var_dir["CT"], k))) ] weight = [work[k] for k in missing] idx = np.argsort(weight) tasks = idx[::-1] keys = missing else: weight = [work[k] for k in keys] idx = np.argsort(weight) tasks = idx[::-1] #tiles.split_global_into_tiles(bb, argo) print(tasks) pd.to_pickle(keys, file_tiles_to_interpolate) # master defines the tasks master.barrier(0) # slaves work master.async_distrib(tasks) master.barrier(1) # gather DataFrame tiles.gather_global_from_tiles() # gather profiles interp.gather_global_from_tiles() # master gathers the dataframes master.barrier(2)
def define_tasks(resume=False): argo = tools.read_argodb() bb = tiles.read_tiles() keys = list(bb.keys()) work = workload(bb) if resume: d = stats.var_dir["CT"] keys = [ k for k in keys if not (os.path.exists(stats.tiles_file % (d, k))) ] weight = [work[k] for k in keys] idx = np.argsort(weight) tasks = list(idx[::-1]) tile_list = [keys[t] for t in tasks] #print(tasks) #print(tile_list) return (tasks, keys)
def master_job(nslaves, debug=False): # define the master director master = mns.Master(nslaves) init.init() dacs, wmos = tools.get_all_wmos() # if debug: # dacs = dacs[::100] # wmos = wmos[::100] argo = tools.read_argodb() known_wmos = set(argo.WMO) if len(wmos) > len(known_wmos): update_with_new_wmos() nwmos = len(wmos) assert nwmos >= nslaves print("number of nwmos: %i" % nwmos) # define tasks print("define the tasks") if False: task_size = (nwmos // nslaves) for itask in range(0, nslaves): istr = itask * task_size iend = istr + task_size if itask == nslaves - 1: iend = nwmos d = dacs[istr:iend] w = wmos[istr:iend] a = argo[argo.WMO.isin(w)] #a = pd.concat([argo[argo.WMO==x] for x in w]) task = (a, d, w) print('task %02i : %i' % (itask, len(w))) f = task_file % itask pd.to_pickle(task, f) # master defines the tasks master.barrier(0) print("slaves are working") # slaves work master.barrier(1) print("gather the results") # # send tasks to slaves # for islave in range(nslaves): # master.send(islave, islave) # gather DataFrame argos = [] for itask in range(nslaves): f = result_file % itask assert os.path.exists(f) a = pd.read_pickle(f) argos += [a] argo = pd.concat(argos) print("number of profiles in the the database: %i" % len(argo)) print("write argo_global.pkl") tools.write_argodb(argo) print("define tiles") tiles.define_tiles() # clean up workdir print("clean up %s" % work_dir) os.system("rm -Rf %s/*.pkl" % work_dir) # master gathers the dataframes master.barrier(2)
def single_proc_update(): dacs, wmos = tools.get_all_wmos() argo = tools.read_argodb() argo = tools.update_argodb(argo, dacs, wmos) return argo