def __init__(self, da_sequence, # pil_image_generator TODO better using generator position=(0, 0), colours = _colour.ImageColours(), antialiasing=None, make_pil_images_now=False, multiprocessing=False): if not isinstance(colours, _colour.ImageColours): raise ValueError("Colours must be a ImageColours instance") self.da_sequence = da_sequence self.stimuli = [] self.position = position self.antialiasing = antialiasing for da in self.da_sequence.dot_arrays: stim = ExprimentDotArray(dot_array=da, position=position, colours=colours, antialiasing=antialiasing) self.stimuli.append(stim) if make_pil_images_now: if not multiprocessing: list(map(lambda x: x._create_pil_image(), self.stimuli)) self._make_image_process = None else: p = _Pool() for c, pil_im in enumerate(p.imap(ExpyrimentDASequence._make_stimuli_map_helper, self.stimuli)): self.stimuli[c]._image = pil_im p.close() p.join()
def gd2e(gd2e_table, project_name, num_cores, tqdm, cache_path): '''We should ignore stations_list as we already selected stations within merge_table''' # try: if gd2e_table[gd2e_table['file_exists'] == 0].shape[0] == 0: print('{} already processed'.format(project_name)) else: gd2e_table = gd2e_table[gd2e_table['file_exists'] == 0].to_records( ) #converting to records in order for mp to work properly as it doesn't work with pandas Dataframe num_cores = num_cores if gd2e_table.shape[ 0] > num_cores else gd2e_table.shape[0] print( 'Processing {} | # files left: {} | Adj. # of threads: {}'.format( project_name, gd2e_table.shape[0], num_cores)) with _Pool(processes=num_cores) as p: if tqdm: list( _tqdm.tqdm_notebook(p.imap(_gd2e, gd2e_table), total=gd2e_table.shape[0])) else: p.map(_gd2e, gd2e_table) #investigate why list is needed. # except: print('cleaning IONEX from RAM as exiting') #cleaning after execution IONEX_cached_path = _os.path.join(cache_path, 'IONEX_merged') _rmtree(IONEX_cached_path)
def _pool_map(n_core, parallel_function, n_sample_u, shape_latent, shared_latent): with _Pool(processes=n_core, initializer=_init_parallel, initargs=(shared_latent, shape_latent)) as pool: pool.map(parallel_function, iterable=_np.arange(n_sample_u)) latent = _np.frombuffer(shared_latent, dtype=_np.float64).reshape(shape_latent) return latent
def _gather_tdps(station_files,num_cores,tqdm): '''Processing extraction in parallel get_tdps_pandas,numpy''' num_cores = num_cores if station_files.shape[0] > num_cores else station_files.shape[0] # chunksize = int(np.ceil(len(station_files) / num_cores)) #20-30 is the best chunksize = 20 with _Pool(processes = num_cores) as p: if tqdm: data = list(_tqdm.tqdm_notebook(p.imap(_get_tdps_npz, station_files,chunksize=chunksize), total=station_files.shape[0])) else: data = p.map(_get_tdps_npz, station_files,chunksize=chunksize) return data
def igs2jpl(begin,end,products_type,products_dir,tqdm,num_cores=None,run_dir = '/run/user/1017/'): #products_dir = '/mnt/data/bogdanm/Products/CODE/source/MGEX/' sets = _gen_sets(begin,end,products_type,products_dir,run_dir = run_dir) sets = sets.to_records() with _Pool(num_cores) as p: if tqdm: list(_tqdm.tqdm_notebook(p.imap(_sp3ToPosTdp, sets), total=sets.shape[0])) else: p.map(_sp3ToPosTdp, sets) tmp_dir = _os.path.join(run_dir,'tmp_igs2jpl') #creating tmp directory processes will work in try:_rmtree(tmp_dir) #clearing memory before processing except: print('Could not remove tmp')
def ce2cm(init_ce_path,num_cores = 10,tqdm=True): cache='/run/user/1017/' cache_path = _os.path.join(cache,'ce2cm_cache') if not _os.path.exists(cache_path): _os.makedirs(cache_path) init_ce_path = _os.path.abspath(init_ce_path) cm_dirname = _os.path.basename(init_ce_path)+'_cm' init_cm_path = _os.path.join(_os.path.dirname(init_ce_path),cm_dirname) if _os.path.exists(init_cm_path): print('CM folder exists. Removing.') _rmtree(init_cm_path) print('Copying {} to {}'.format(_os.path.basename(init_ce_path),cm_dirname)) # dst = _copytree(src=init_ce_path,dst=init_cm_path) print('Finished copying to {}'.format(init_cm_path)) # pos_files = _glob.glob(init_cm_path+'/*/*pos.gz') # print('Found {} pos files. Running'.format(len(pos_files))) #files to make symlinks product_files = _pd.Series(_glob.glob(init_ce_path+'/*/*.gz')) product_file_names_df = product_files.str.split('/',expand=True).iloc[:,-1].str.split('.',expand=True) symlink_files = product_files[product_file_names_df[1] != 'pos'].to_list() # files to copy (.pos) pos_files = product_files[product_file_names_df[1] == 'pos'].to_list() basedir = _os.path.abspath(_os.path.join(symlink_files[0],_os.pardir,_os.pardir,_os.pardir)) files_symlinks = _pd.Series(symlink_files).str.split('/',expand=True).iloc[:,-3:] symlink_src = (basedir + '/' + files_symlinks.iloc[:,0]+'/'+files_symlinks.iloc[:,1]+'/'+files_symlinks.iloc[:,2]) symlink_dst = (basedir + '/' + files_symlinks.iloc[:,0]+'_cm/'+files_symlinks.iloc[:,1]+'/'+files_symlinks.iloc[:,2]) year_dirs = basedir + '/' + files_symlinks.iloc[:,0][0]+'_cm/' + files_symlinks.iloc[:,1].unique() for dir_path in year_dirs: if not _os.path.exists(dir_path): _os.makedirs(dir_path) print('creating symlinks for products files (except for *.pos.gz)') for i in range(len(symlink_src)): _os.symlink(src=_os.path.relpath(path=symlink_src[i],start=_os.path.dirname(symlink_dst[i])),dst=symlink_dst[i]) files_pos = _pd.Series(pos_files).str.split('/',expand=True).iloc[:,-3:] pos_src = (basedir + '/' + files_pos.iloc[:,0]+'/'+files_pos.iloc[:,1]+'/'+files_pos.iloc[:,2]) pos_dst = (basedir + '/' + files_pos.iloc[:,0]+'_cm/'+files_pos.iloc[:,1]+'/'+files_pos.iloc[:,2]) cache_path_series = _np.ndarray(pos_src.shape,dtype=object) cache_path_series.fill(cache_path) pos_path_series = _pd.concat([pos_src,pos_dst,_pd.Series(cache_path_series)],axis=1).values # return pos_path_series with _Pool(processes = num_cores) as p: if tqdm: list(_tqdm.tqdm_notebook(p.imap(_ce2cm_single_thread, pos_path_series), total=len(pos_path_series))) else: p.map(_ce2cm_single_thread, pos_path_series) _rmtree(path=cache_path)
def dr_merge(merge_table, num_cores, tqdm): '''merge_table is the output of get_merge_table(). Merges all that is of class 3 as merge_table stores only files that are actual''' num_cores = int(num_cores) #safety precaution if str value is specified df_class3 = merge_table[['begin', 'path_prev', 'path', 'path_next' ]][merge_table['completeness'] == 3].copy() df_class3['merge_begin'] = ( df_class3['begin'].astype('datetime64[D]') - _np.timedelta64(3, '[h]') - J2000origin).astype('timedelta64[s]').astype(int) df_class3['merge_end'] = (df_class3['begin'].astype('datetime64[D]') + _np.timedelta64(27, '[h]') - J2000origin).astype('timedelta64[s]').astype(int) # merging to 2:55:00 df_class3['merge_end'] = (df_class3['begin'].astype('datetime64[D]') + _np.timedelta64( 27,'[h]') - _np.timedelta64( 5,'[m]') -J2000origin).astype('timedelta64[s]').astype(int) merge_table_class3 = df_class3[[ 'merge_begin', 'merge_end', 'path_prev', 'path', 'path_next' ]] # check if merged version already exists ifexists = _np.zeros((merge_table_class3.shape[0])) merged_paths = merge_table_class3['path'] + '.30h' for i in range(merged_paths.shape[0]): ifexists[i] = _os.path.isfile(merged_paths.values[i]) ifexists = ifexists.astype(bool) merge_table_class3_run = merge_table_class3[~ifexists] if (merge_table_class3[~ifexists]).shape[0] == 0: print('All merge files present') else: num_cores = num_cores if merge_table_class3_run.shape[ 0] > num_cores else merge_table_class3_run.shape[0] print('Number of files to merge:', merge_table_class3_run.shape[0], '| Adj. num_cores:', num_cores) with _Pool(processes=num_cores) as p: if tqdm: list( _tqdm.tqdm_notebook(p.imap( _merge, merge_table_class3_run.to_records()), total=merge_table_class3_run.shape[0])) else: p.map(_merge, merge_table_class3_run.to_records())
def gen_penna_tdp(tmp_path, staDb_path, tqdm, period=13.9585147, # Penna, N. T. et al. (2015) p.6526 num_cores = 25, A_East=2, A_North=4, A_Vertical=6): ''' 1. Read staDb file (staDb has to have information on all the stations in the dataset) 2. Extract stations names and positions. Create rot for each station. 3. Glob all tdp files 4. Loop throught tdp files list. Read each file. All years and DOYs that are present in the directory!!! 5. For each file extract time values. Generate synth waves. 6. Rotate for each station [in staDb?] and create tdp output lines for each station. staDb is generated on the fly from the list of stations fetched 7. Concatanate outputs and append to input tdp file ''' files = _np.asarray(sorted(_glob.glob(tmp_path+'/tropNom/*/*/30h_tropNominalOut_VMF1.tdp'))) num_cores = num_cores if len(files) > num_cores else len(files) ref_xyz_df = get_ref_xyz(staDb_path) rot = get_rot(ref_xyz_df) aux = _np.empty((files.shape[0],6),dtype = object) #population array [xyz_staDb_data, period, A_East, A_North, A_Vertical, rot] aux[:] = [ref_xyz_df,period,A_East, A_North, A_Vertical,rot] print('Number of files to be processed:', len(files), '\nAdjusted number of cores:', num_cores) np_set = _np.column_stack((files,aux)) ''' np_set[0] ['/mnt/Data/bogdanm/tmp_GipsyX/tropNom/2003/001/30h_tropNominalOut_VMF1.tdp', Station X Y Z 0 BRAE 3.475467e+06 -206213.0000 5.326645e+06 1 LOFT 3.706041e+06 -55853.0000 5.173496e+06 2 WEAR 3.686877e+06 -143592.0000 5.185648e+06 3 CAMB 4.071647e+06 -379677.1000 4.878479e+06 4 HERT 4.033461e+06 23537.6625 4.924318e+06 5 LERW 3.183055e+06 -65838.5000 5.508326e+06 6 NEWL 4.079954e+06 -395930.4000 4.870197e+06 7 SHEE 3.983074e+06 51683.0000 4.964640e+06, 13.9585147, 2, 4, 6]''' with _Pool(processes = num_cores) as p: if tqdm: list(_tqdm.tqdm_notebook(p.imap(_gen_penna_tdp_file, np_set), total=np_set.shape[0])) else: p.map(_gen_penna_tdp_file, np_set)
def create(index, name=config.get("cloudmesh.azure.username")): global vmName, vmImage, vmPassword vmImage = config.get("cloudmesh.azure.image") userName = config.get("cloudmesh.azure.username") vmPassword = config.get("cloudmesh.azure.password") vmName = name if vmImage == "": print ("image name not set, use set:image,value)") elif userName == "": print ("image name not set, use set:username,value)") elif vmPassword == "": print ("image name not set, use set:password,value)") else: """Creates a number of vms with the labels prefix-0 to prefix-<number-1>. It uses a threadpool""" pool = _Pool(processes=maxparallel) list = range(0, int(index)) result = pool.map(_boot, list) # _boot(index) print result
def create(index, name=config.get('azure', 'username')): global vmName, vmImage, vmPassword vmImage = config.get('azure', 'image') userName = config.get('azure', 'username') vmPassword = config.get('azure', 'password') vmName = name if(vmImage == ''): print('image name not set, use set:image,value)') elif(userName == ''): print('image name not set, use set:username,value)') elif(vmPassword == ''): print('image name not set, use set:password,value)') else: """Creates a number of vms with the labels prefix-0 to prefix-<number-1>. It uses a threadpool""" pool = _Pool(processes=maxparallel) list = range(0, int(index)) result = pool.map(_boot, list) # _boot(index) print result
def create(index, name=config.get('cloudmesh.azure.username')): global vmName, vmImage, vmPassword vmImage = config.get('cloudmesh.azure.image') userName = config.get('cloudmesh.azure.username') vmPassword = config.get('cloudmesh.azure.password') vmName = name if (vmImage == ''): print('image name not set, use set:image,value)') elif (userName == ''): print('image name not set, use set:username,value)') elif (vmPassword == ''): print('image name not set, use set:password,value)') else: """Creates a number of vms with the labels prefix-0 to prefix-<number-1>. It uses a threadpool""" pool = _Pool(processes=maxparallel) list = range(0, int(index)) result = pool.map(_boot, list) # _boot(index) print result
def jpl2merged_orbclk(begin,end,GNSSproducts_dir,num_cores=None,h24_bool=True,makeShadow_bool=True,tqdm=True,run_dir = '/run/user/1017/'): '''GipsyX can only merge daily products so effectively we end up having 3 days merged (72 hours, centre 24 +- 24) and not 30 hours''' begin64 = _np.datetime64(begin).astype('datetime64[D]') end64 = _np.datetime64(end).astype('datetime64[D]') products_day = _np.arange(begin64,end64) products_begin = ((products_day - _np.timedelta64(3,'h')) - _J2000origin).astype(int) products_end = (products_day + _np.timedelta64(27,'h') - _J2000origin).astype(int) #rewriting 1st and last values. These are 27 hour products precisely according to boundaries specified products_begin[0] = (products_day[0] - _J2000origin).astype(int) products_end[-1] = (products_day[-1] + _np.timedelta64(24,'h') - _np.timedelta64(5,'m')- _J2000origin).astype(int) year_str = (_pd.Series(products_day).dt.year).astype(str) output_merged_dir = _os.path.abspath(GNSSproducts_dir) target_path = _os.path.abspath(_os.path.join(output_merged_dir,_os.pardir,_os.pardir,'init',_os.path.basename(output_merged_dir))) if _os.path.exists(target_path): _rmtree(target_path) target_dir = target_path +'/' + year_str for dir in target_dir.unique(): #creating folder structure before conversion _os.makedirs(dir) repository = _np.ndarray((products_day.shape),object) h24 = _np.ndarray((products_day.shape),bool) makeShadow = _np.ndarray((products_day.shape),bool) tmp_merge_path = _os.path.abspath(run_dir)+ '/tmp_merge/' run = tmp_merge_path +_pd.Series(products_day).astype(str) # Need to clear run before new execution just in case if _os.path.exists(tmp_merge_path) : _rmtree(tmp_merge_path) repository.fill(GNSSproducts_dir) h24.fill(h24_bool) makeShadow.fill(makeShadow_bool) input_sets = _np.column_stack([products_begin,products_end,repository,target_dir,h24,makeShadow,products_day,run]) with _Pool(processes = num_cores) as p: if tqdm: list(_tqdm.tqdm_notebook(p.imap(_gen_orbclk, input_sets), total=input_sets.shape[0])) else: p.map(_gen_orbclk, input_sets) _rmtree(tmp_merge_path) #cleaning
def rnx2dr(selected_df, num_cores, tqdm, cache_path, staDb_path, cddis=False): '''Runs rnxEditGde.py for each file in the class object in multiprocessing''' #Checking files that are already in place so not to overwrite print('staDb_path:', staDb_path) if_exists_array = _np.ndarray((selected_df.shape[0]), dtype=bool) for i in range(if_exists_array.shape[0]): if_exists_array[i] = not _os.path.exists(selected_df['dr_path'][i]) selected_df = selected_df[if_exists_array] selected_df2convert = selected_df[['rnx_path', 'dr_path']].copy() selected_df2convert[ 'cache_path'] = cache_path #populating df with cache path value selected_df2convert[ 'staDb_path'] = staDb_path #populating with staDb_path which is needed as rnx files may lack receiver information selected_df2convert = selected_df2convert.values if selected_df2convert.shape[0] > 0: num_cores = num_cores if selected_df2convert.shape[ 0] > num_cores else selected_df2convert.shape[0] print('Number of files to process:', selected_df2convert.shape[0], '| Adj. num_cores:', num_cores, end=' ') with _Pool(processes=num_cores) as p: if tqdm: list( _tqdm.tqdm_notebook(p.imap(_2dr, selected_df2convert), total=selected_df2convert.shape[0])) else: p.map(_2dr, selected_df2convert) else: #In case length of unconverted files array is 0 - nothing will be converted print( 'RNX files converted.\nNothing to convert. All available rnx files are already converted' )
def get_ionex_data(self,in_sets): num_cores = self.num_cores if len(in_sets) > self.num_cores else len(in_sets) chunksize = int(_np.ceil(len(in_sets)/num_cores)) # Collecting ionex maps from multiple files in parallel with _Pool(num_cores) as p: if self.tqdm: GIM_data = list(_tqdm.tqdm_notebook(p.imap(GIM_data_extraction, in_sets[:,[3,4]]), total=in_sets.shape[0])) else: GIM_data = p.map(GIM_data_extraction, in_sets[:,[3,4]]) # GIM_data_extraction expects only array with filepaths # Need this piece of code to kill 00 values eliminating duplicates for i in range(len(GIM_data)-1): if (GIM_data[i][0].tail(1).dt.hour == 0).iloc[0]: GIM_data[i].drop(GIM_data[i][0].tail(1).index[0],inplace = True) # Merging all data into two arrays (TEC and RMS) data_GIM_final = _pd.DataFrame() for element in GIM_data: data_GIM_final = _pd.concat((data_GIM_final,element[[1,2]])) # Resulting array with two columns return data_GIM_final.values
def gather_metadata(logs_glob_path = '/data/station_logs/station_logs_IGS/*/*.log', rnx_glob_path=None, num_threads=1): '''parses logiles found with glob expression''' parsed_filenames = find_recent_logs(logs_glob_path=logs_glob_path, rnx_glob_path=rnx_glob_path).values total = parsed_filenames.shape[0] if num_threads == 1: gather = [] for file in tqdm(parsed_filenames,miniters=total//100,total=total): gather.append(parse_igs_log(file)) else: with _Pool(processes=num_threads) as pool: gather = list(tqdm(pool.imap_unordered(parse_igs_log, parsed_filenames), total=total, miniters=total//100)) gather_raw = _np.concatenate(gather) rec_ant_mask = gather_raw[:, 0] != 0 # id_loc = 0, rec = 1, ant = 2 gather_id_loc = gather_raw[~rec_ant_mask][:, 1:] gather = gather_raw[rec_ant_mask] stacked_rec_ant_dt = _np.concatenate( [gather[:, -3], gather[:, -2]], axis=0) stacked_rec_ant_dt = igslogdate2datetime64(stacked_rec_ant_dt) snx_date = datetime2yydoysec(stacked_rec_ant_dt) gather = _np.concatenate([gather, snx_date.reshape(2, gather.shape[0]).T], axis=1) stacked_rec_ant_dt_beg_end = stacked_rec_ant_dt.reshape(2, gather.shape[0]) # also deals with nans as no equal sign # same foes for station start being empty as it becomes year 2100 valid_mask_dt = stacked_rec_ant_dt_beg_end[0] < stacked_rec_ant_dt_beg_end[1] bad_dt_stations = _np.unique(gather[~valid_mask_dt][:, 1]) rec_mask = gather[:, 0] == 1 rec_df = _pd.DataFrame( _np.delete(arr=gather[rec_mask], axis=1, obj=[0, 6, 7, 8]), columns=['CODE','RECEIVER','GNSS','S/N','FW','BEGIN_RAW','END_RAW', 'PATH','BEGIN_SNX','END_SNX']) ant_df = _pd.DataFrame( gather[~rec_mask][:, 1:], columns=['CODE','ANTENNA','RADOME','S/N','EccU','EccN','EccE', 'RADOME2','BEGIN_RAW','END_RAW','PATH','BEGIN_SNX', 'END_SNX']) # ID LOC id_loc_df = _pd.DataFrame(gather_id_loc,columns=['CODE','DOMES_N','CITY','COUNTRY', 'X','Y','Z','LAT','LON','HEI','PATH']) id_loc_df.CITY[id_loc_df.CITY == ''] = 'N/A' id_loc_df.CITY = id_loc_df.CITY.str.rstrip().str.upper() id_loc_df.COUNTRY = translate_series(id_loc_df.COUNTRY.str.rstrip().str.upper(), translation_country).values id_loc_df.DOMES_N[id_loc_df.DOMES_N == ''] = '---------' xyz_array = (id_loc_df[['X','Y','Z']].stack() .str.replace(',','.') .replace({'':None}) .unstack().values.astype(float)) valid_mask = _np.all((( xyz_array != 0) & ~_np.isnan(xyz_array)),axis=1) xyz_norm = (xyz_array[valid_mask] ** 2).sum(axis=1) **0.5 valid_mask[valid_mask] = (xyz_norm > 6000000) &(xyz_norm < 6500000) llh = xyz2llh_heik(xyz_array[valid_mask],deg=True) llh_snx = llh2snxdms(llh) llh2 = id_loc_df[~valid_mask][['LAT','LON','HEI']] llh2_snx = logllh2snxdms(llh2) snxdms = _np.empty(valid_mask.shape,dtype=object) snxdms[valid_mask] = llh_snx # snxdms[valid_mask] =' 000 00 00.0 00 00 00.0 000.0' snxdms[~valid_mask] = llh2_snx# # snxdms[~valid_mask] = ' 000 00 00.0 00 00 00.0 000.0'#llh2_snx # bad_loc_stations = id_loc_df.CODE[snxdms == ''].values id_loc_df['LOC'] = snxdms ecc = ant_df[['EccU','EccN','EccE']].values ecc[ecc == ''] = 0 ant_df[['EccU','EccN','EccE']] = ecc.astype(float) rec_df.RECEIVER = rec_df.RECEIVER.str.rstrip().str.upper() ant_df.ANTENNA = ant_df.ANTENNA.str.rstrip().str.upper() ant_df.RADOME = ant_df.RADOME.str.rstrip().str.upper() ant_df.RADOME2 = ant_df.RADOME2.str.rstrip().str.upper() no_rad2_mask = ~ant_df.RADOME.isin(atx_rad_tbl) ant_df.RADOME[no_rad2_mask] = ant_df.RADOME2[no_rad2_mask] # translation_ant.index.name= None antennas = translate_series(ant_df.ANTENNA,translation_ant) invalid_ant_mask = ~antennas.index.isin(atx_ant_tbl) bad_antenna_stations = ant_df[invalid_ant_mask]['CODE'].unique() receivers = translate_series(rec_df.RECEIVER,translation_rec) invalid_rec_mask = ~receivers.index.isin(igs_rec_tbl) bad_rec_stations = rec_df[invalid_rec_mask]['CODE'].unique() radomes = translate_series(ant_df.RADOME,translation_rad) invalid_radomes_mask = ~radomes.index.isin(atx_rad_tbl) bad_radome_stations = ant_df[invalid_radomes_mask]['CODE'].unique() ant_df.ANTENNA = antennas.values ant_df.RADOME = radomes.values rec_df.RECEIVER = receivers.values bad_stations = _np.unique(bad_dt_stations.tolist() + bad_radome_stations.tolist() + bad_antenna_stations.tolist() + bad_rec_stations.tolist()) rec_df = rec_df[~rec_df.CODE.isin(bad_stations)].copy() ant_df = ant_df[~ant_df.CODE.isin(bad_stations)].copy() id_loc_df = id_loc_df[~id_loc_df.CODE.isin(bad_stations)].copy() return id_loc_df,rec_df,ant_df
while 1 : var = raw_input("Image : ") if images.has_key(var) : print images[var][0] return images[var][0] else : print "Incorrect Image name" return var def create(self, index, image=None, name) if(image != None): """Creates a number of vms with the labels prefix-0 to prefix-<number-1>. It uses a threadpool""" pool = _Pool(processes=maxparallel) list = range(0, int(index)) result = pool.map(_boot, list) print result def _boot(self, index, name): #cmd = 'azure vm create %(vmname)s %(image)s %(username)s --ssh --location "East US" %(password)s' % vm #print cmd result = _vm_create("%s" % vm_name(index), "%s" % name "%(username)s" % self.credentials, "--ssh", "--location", "East US",
def build( self, build_all: bool = False, target_list: _Optional[list] = None, number_of_threads: _Optional[int] = None, ): """Build targets of this project. By default, this function builds all targets in this project as well as all their dependencies. This function will configure all targets that haven't been configured in a previous call. Parameters ---------- build_all : bool If set to true, will not only build all targets in this project and their dependencies, but also all targets of all sub-projects. target_list : list If given, will build all targets in this project that are in the given list, as well as all their dependencies. number_of_threads : int If given will compile targets with the given number of threads. Otherwise it will use the default number of CPU cores visible to Python. """ # Get targets to build targets_to_build = self._get_targets_to_build(build_all, target_list) # Sort targets in build order build_list = [ target for target in reversed( list(_nx.topological_sort(self._project_tree))) if (target in targets_to_build or build_all) and not isinstance(self._project_tree.nodes[target]["data"], Project) ] # Get project sources, if any project_build_list = [] for target_description in build_list: project_build_list.append(target_description.parent_project) project_build_list = list(dict.fromkeys(project_build_list)) for project in project_build_list: project.get_sources() ### Note: the project_tree needs to be updated directly for dependencies ### to be used correctly in the `_target_from_description` function target_build_list = [] for list_entry in build_list: if isinstance(list_entry, _TargetDescription): target = self._target_from_description( self._project_tree.nodes[list_entry]["data"]) if target: target_build_list.append(target) self._project_tree.nodes[list_entry]["data"] = target elif isinstance(list_entry, _Target): target_build_list.append(list_entry) else: error_message = self.parent.log_message( f"Found {target} in target list, which cannot be used because" " it is not derived from Target or TargetDescription.") self._logger.exception(error_message) raise RuntimeError(error_message) if not target_build_list: self._logger.info("No targets to be built") else: self._logger.info( f"Building {', '.join([str(target) for target in target_build_list])}" ) # Compile with _Pool(processes=number_of_threads) as process_pool: for target in target_build_list: target.compile(process_pool, False) # Link for target in target_build_list: target.link() # Bundle if self._environment.bundle: with _Pool(processes=number_of_threads) as process_pool: for target in target_build_list: target.bundle() # Redistributable bundle if self._environment.redistributable: with _Pool(processes=number_of_threads) as process_pool: for target in target_build_list: target.redistributable()
def gen_tropnom(tmp_dir,staDb_path,rate,VMF1_dir,num_cores): ''' Generating tropnominal file for valid stations in staDb file.Takes number of years from dr_info.npz Had to create additional for loop as file no 31 gives error, no matter what year it is (tropNom read error of VMF1 file). tdp file is created for each observation file ''' num_cores = int(num_cores) #Creates a staDb object staDb=_StationDataBase.StationDataBase(dataBase = staDb_path) #creating staDb object stns = staDb.getStationList() #creating array with available station names print(len(stns),'sites found in staDb:',stns) #verbal output of stations that will be present in tropNom files drinfo_file = _dump_read(filename='{}/{}/{}.zstd'.format(tmp_dir,rnx_dr_lbl,drInfo_lbl)) drinfo_years_list = drinfo_file.begin.dt.year.unique() #creating folder and file structure taking into account leap year. #resulting paths look as follows: year/doy/30h_tropNominal.vmf1 #data on next day needed to create current day tropnominal days_in_year=_np.ndarray((len(drinfo_years_list)),dtype=int) current_year = _np.datetime64('today').astype('datetime64[Y]').astype(str).astype(int) for i in range(len(drinfo_years_list)): # vmf1 data is missing at current year (if it is not a prediction), # so an additional chech of files present is needed if int(drinfo_years_list[i]) != current_year: days_in_year[i] = int(365 + (1*_calendar.isleap(drinfo_years_list[i]))) date = (_np.datetime64(str(drinfo_years_list[i])) + (_np.arange(days_in_year[i]).astype('timedelta64[D]'))) #Now all works correctly. The bug with wrong timevalues was corrected. else: current_year_VMF1_dir_ah = _os.path.join(VMF1_dir, str(current_year),'ah') last_ah_file_path = sorted(_glob.glob(current_year_VMF1_dir_ah+'/*'))[-1] # e.g ah19315.h18.gz last_ah_filename = _os.path.basename(last_ah_file_path) last_day_used = int(last_ah_filename[4:7]) - 1 #we do -1 as to create a 30h tropnominal date = _np.datetime64(str(current_year)) + (_np.arange(last_day_used).astype('timedelta64[D]')) print('Last VMF1 day in {} is {}. Generating up to {}'.format(str(current_year),last_ah_filename[4:7],str(last_day_used))) begin = ((date - J2000origin) - _np.timedelta64(3,'[h]')).astype(int) end = ((date - J2000origin) + _np.timedelta64(27,'[h]')).astype(int) tropNom_out = (tmp_dir +'/tropNom/'+ str(drinfo_years_list[i])+'/'+_pd.Series(date).dt.dayofyear.astype(str).str.zfill(3)+'/30h_tropNominalOut_VMF1.tdp').values staDb_nd = _np.ndarray((tropNom_out.shape),dtype=object) rate_nd = _np.ndarray((tropNom_out.shape),dtype=object) VMF1_dir_nd = _np.ndarray((tropNom_out.shape),dtype=object) stns_nd = _np.ndarray((tropNom_out.shape),dtype=object) staDb_nd.fill(staDb); rate_nd.fill(rate); VMF1_dir_nd.fill(VMF1_dir); stns_nd.fill(stns) tropnom_param = _np.column_stack((begin,end,tropNom_out,staDb_nd,rate_nd,VMF1_dir_nd,stns_nd)) num_cores = num_cores if len(tropnom_param) > num_cores else len(tropnom_param) step_size = int(_np.ceil(len(tropnom_param) / num_cores)) print(drinfo_years_list[i],'year tropnominals generation...',end=' ') print ('Number of files to process:', len(tropnom_param),'| Adj. num_cores:', num_cores) # tqdm implementation will produce lots of bars because of for loop pools for i in range(step_size): try: pool = _Pool(num_cores) pool.map(_gen_VMF1_tropNom, tropnom_param[_np.arange(i, len(tropnom_param), step_size)]) finally: pool.close() pool.join() print('| Done!')
def get_drInfo(tmp_dir, num_cores, tqdm, selected_rnx): '''Analysis is done over all stations in the projects tmp_dir. The problem to run analysis on all converted fies is 30 hour files Naming convention for 30h files was changed that are present in the directory so original files are difficult to extract. Need to change merging naming''' tmp_dir = _os.path.abspath(tmp_dir) num_cores = int(num_cores) #safety precaution if str value is specified rnx_dir = _os.path.join(tmp_dir, rnx_dr_lbl) drinfo_dir = _os.path.join(rnx_dir, drInfo_lbl) if not _os.path.exists(drinfo_dir): _os.makedirs(drinfo_dir) selected_rnx['good'] = _dr_size(selected_rnx['dr_path']) > 20 #New approach to file saving is to save SSSSYYYY.zstd files for each year in each station. More modular approach. stations = selected_rnx[ selected_rnx['good']]['station_name'].unique().sort_values() print('stations selected: {}'.format(stations.get_values())) years = selected_rnx[selected_rnx['good']]['year'].unique() years.sort() print('years selected : {}'.format(years)) for station in stations: for year in years: filename = '{drinfo_dir}/{yyyy}/{station}{yy}.zstd'.format( drinfo_dir=drinfo_dir, yyyy=year.astype(str), station=station.lower(), yy=year.astype(str)[2:]) if not _os.path.exists(filename): dr_station_year = selected_rnx[ (selected_rnx['station_name'] == station) & (selected_rnx['year'] == year)] dr_good_station_year = dr_station_year['dr_path'][ dr_station_year['good']] if dr_good_station_year.shape[0] > 0: print( '{} good files found for {}{} out of {}. Running get_drInfo...' .format(dr_good_station_year.shape[0], station, year, dr_station_year.shape[0])) num_cores = num_cores if dr_good_station_year.shape[ 0] > num_cores else dr_good_station_year.shape[0] with _Pool(processes=num_cores) as p: if tqdm: drinfo_df = _pd.concat(list( _tqdm.tqdm_notebook( p.imap(_drInfo2df, dr_good_station_year), total=dr_good_station_year.shape[0], desc='{}{}'.format(station.lower(), year.astype(str)[2:]))), axis=0, ignore_index=True) else: print('Running get_drInfo for {station}{yy}.zstd'. format(station=station.lower(), yy=year.astype(str)[2:])) drinfo_df = _pd.concat(p.map( _drInfo2df, dr_good_station_year), axis=0, ignore_index=True) drinfo_df['station_name'] = drinfo_df[ 'station_name'].astype('category') drinfo_df['length'] = (drinfo_df['end'] - drinfo_df['begin']).astype( 'timedelta64[h]').astype(int) #Saving extracted data for furthe processing _dump_write(data=drinfo_df, filename=filename, cname='zstd', num_cores=num_cores) #gather should be separate, otherwise conflict and corrupted files else: print('{} good files found for {}{} out of {}. Skipping.'. format(dr_good_station_year.shape[0], station, year, dr_station_year.shape[0])) else: print('{} exists'.format(filename))
def uncompress_mp(filelist, num_cores=10): with _Pool(processes=num_cores) as p: p.map(uncompress, filelist)
def sghmc_chains(grad_log_den_data, grad_log_den_prior, data, V_hat, eps, theta_0, C, heatup, epoches, batch_size, chain=1, Minv=None): ''' Implementation of Stochastic Gradient Hamiltonian Monte Carlo. (See details in Chen et al., 2014) This is a multiprocess version of sghmc (only works on linux). It will run multiple(number = chain) simulations simutaneously. And returns a list of simulations Dimensions in sampling procdure: p: dimension of parameters(theta) n: number of observed data. m: dimension of data. INPUT: grad_log_den_data: function with parameters (data,theta) to compute $\\nabla log(p(data|theta))$ (gradient with respect to theta) of a set of data. grad_log_den_prior: function with parameter (theta) to compute $\\nabla log(p(theta))$. data: np.array with shape (n,m) representing observed data V_hat: np.array with shape (p,p) a matrix of estimated Fisher Information eps: float or double learning rate theta_0: np.array with shape (p,) initial point of sampling. C: np.array with shape (p,p) a matrix representing friction, see paper for details. C-0.5*eps*V_hat must be positive definite. heatup: int iteration to dump before storing sampling points. epoches: int iterations to run. Must be greater than heatup. batch_size: int size of a minibatch in an iteration, hundreds recommended chain: int number of chains to run. Each chain is a simulation. Minv: np.array with shape (p,p) if default(NULL), will be identical. (See paper for details) OUT: sample: a list (number = chain) of np.array with shape (epoches - heatup, p) sampled posterior thetas. ''' n, m = data.shape p = theta_0.shape[0] if V_hat.shape != (p, p): _sys.exit('V_hat dimension do not match with theta') if Minv is not None: if Minv.shape != (p, p): _sys.exit('Minv dimension do not match with theta') if C.shape != (p, p): _sys.exit('C dimension do not match with theta') if n % batch_size != 0: _sys.exit('number of data should be divisible by batch_size') sqrt_noise = _la.sqrtm(2 * (C - 0.5 * eps * V_hat) * eps) batches = _np.int(_np.ceil(n / batch_size)) if (Minv is None): sqrtM = None prer = eps fric = eps * C else: sqrtM = _la.sqrtm(_la.inv(Minv)) prer = eps * Minv fric = eps * C @ Minv sp = _pt(_single_chain, theta_0=theta_0, epoches=epoches, heatup=heatup, p=p, n=n, Minv=Minv, sqrtM=sqrtM, data=data, batches=batches, prer=prer, gradU=_gradU, grad_log_den_data=grad_log_den_data, grad_log_den_prior=grad_log_den_prior, eps=eps, fric=fric, sqrt_noise=sqrt_noise) with _Pool(processes=chain) as pool: seedss = list(_np.random.randint(0, 10000, chain)) res = pool.map(sp, seedss) return (res)
_logpath = _EXPDIR + "/" + _LOG_FILE if True: # Worker _t_total= time.time() def proc(run): # Called by each worker _r_time = time.time() if conf.delay: if run[1] <= conf.workers: time.sleep(conf.delay*(run[1]-1)) os.system('python3 ' + conf.file + ' ' + run[0] + (' > /dev/null' if not conf.verbose else '') ) _elapsed = time.time() - _r_time s = str(dt.datetime.now(tz=pytz.timezone(_TIMEZONE)))[:19] + ' Completed: ' + str(run[1]) + " / " + str(n) + ' in ' + human_time(_elapsed) + ' ETA: ' + human_time(_elapsed * ((n - run[1]) / conf.workers)) if conf.save: with open(_logpath,"a") as logfile: logfile.write(s + '\n') print(s) if __name__ == '__main__': # Main program pool = _Pool(processes=conf.workers) pool.imap(proc, runs) pool.close() pool.join() s = str(dt.datetime.now(tz=pytz.timezone(_TIMEZONE)))[:19] + ' All finished! This took: ' + human_time(time.time() - _t_total) if conf.save: with open(_logpath,"a") as logfile: logfile.write(s + '\n') print(s)