def multi_Non_Tweep_friends(self, handle): min_position, links = self.get_tweets(handle) print("Scraping last 100 days of activity") while (True): min_position1, links1 = self.get_tweets(handle, min_position) links = links + links1 if (min_position1 == None): break min_position = min_position1 people_list = [] link = [x for x in links if handle in x] link = self.duplicates(link) p = Pool(10) # Pool tells how many at a time with Pool(10) as p: records = list(tqdm(p.imap(self.get_people, link), total=len(link))) p.terminate() p.join() p.close() people_list = [item for sublist in records for item in sublist] people_list = self.duplicates(people_list) people_list = [x for x in people_list if x != handle] return (people_list)
def certScanner (self) : p = Pool(nodes = 512) cprint ("[+] Keywords : " + " ".join(str(x) for x in self.keywordList), 'green') # self.allipAddrList = self.shuffleList() self.allipAddrList = [x for x in self.shuffleList() if self.region in x ] for self.tryipClass in self.allipAddrList: self.ipExtractResult = self.ipExtract(self.tryipClass.split("@")[0]) _max = len(self.ipExtractResult) cprint ("[+] Scanning IP Addr Class : " + self.tryipClass + "\t-- Number of scan target is :" + str(len(self.ipExtractResult)), 'green') with tqdm(total=_max) as pbar: pbar.set_description("[+] Progressing : %s " %self.tryipClass) for i, domain in tqdm(enumerate(p.imap(self.certChecker, self.ipExtractResult))): pbar.update() if domain is not None: self.resList.append(domain) pbar.close() p.terminate() # Like p.close() p.restart() # Like p.join() if self.resList: self.printRes() else: cprint ("[!] No kewords found on this IP class \n", 'red') time.sleep(1) self.ipExtractResult = [] self.resList = []
def extract_hits(bins_to_contig_lists, outdir, contig_file, threads): p = Pool(threads) pullseq_tmp = os.path.join(outdir, 'pullseq_ids_tmp') if not os.path.exists(pullseq_tmp): os.system('mkdir ' + pullseq_tmp) def pullseq_by_bin(bin_name, contig_list, contig_file): #Generates a file with the names of all the contigs to pull out #then provides that to pullseq; #parses the resulting fasta output from pullseq and then #passes it back. with open(os.path.join(pullseq_tmp, bin_name + '.txt'), 'w') as outfile: for element in contig_list: outfile.writelines(element + '\n') os.system('pullseq -i ' + contig_file + ' -n ' + os.path.join(pullseq_tmp, bin_name + '.txt') + ' > ' + os.path.join(outdir, bin_name + '.fasta')) return p.map(lambda x: pullseq_by_bin(x, bins_to_contig_lists[x], contig_file), bins_to_contig_lists) #for bin in bins_to_contig_lists: # pullseq_by_bin(bin, bins_to_contig_lists[bin], contig_file) os.system('rm -rf ' + pullseq_tmp) p.terminate() return
class ConsensusMHSampler(MHSampler): def __init__(self, log_f, log_g, g_sample, x0, iterations, shards=1): super(ConsensusMHSampler, self).__init__(log_f, log_g, g_sample, x0, iterations) self.shards = shards assert len(self.log_distribution_fn) == self.shards self.log_fn_dict = {} # for pickling purposes for i in range(self.shards): self.log_fn_dict[i] = self.log_distribution_fn[i] self.pool = Pool(nodes=self.shards) def sample(self): map_results = self.pool.map(self.map_sample, range(self.shards)) self.pool.close() self.pool.join() self.pool.terminate() self.pool.restart() self.saved_states = self.reduce_sample(map_results) def map_sample(self, index): np.random.seed(1) cur_state = self.start_state sample_results = [cur_state] prob, count = 0, 0 for i in range(self.iterations): if i % 5000 == 0: print("iteration {}".format(i)) candidate_state = self.get_transition_sample(cur_state) acceptance = self.calculate_acceptance_ratio(candidate_state, self.log_fn_dict[index]) prob += acceptance count += 1 new_state = self.transition_step(cur_state, candidate_state, acceptance) sample_results.append(new_state) cur_state = new_state sample_results = np.array(sample_results) print("INDEX {}: Avg acceptance prob is {}".format(index, prob/count)) return (sample_results, 1.0 / (1e-8 + self.get_sample_variance(sample_results))) def get_sample_variance(self, data): return np.linalg.norm(np.var(np.array(data), axis=0)) def reduce_sample(self, results): ''' results is a list of (sample_array, weight) tuples ''' sample_results = 0 total_weight = 0 for sample, weight in results: sample_results += weight * sample total_weight += weight return sample_results / total_weight
def parallelize_dataframe(df: pd.DataFrame, func, n_cores=4) -> pd.DataFrame: df_split = np.array_split(df, n_cores) pool = Pool(n_cores) df = pd.concat(pool.map(func, df_split)) pool.close() pool.join() # have to include this to prevent leakage and allow multiple parallel function calls pool.terminate() pool.restart() return df
def _multiprocess2D(func, args_array, ncores=4, display=True): ''' Multipurpose parallel processing Takes a function and an array of arguments, evaluates the function with the given arguments for each point, processing in parallel using ncores number of parallel processes. WARNING: needs to be protected by a if __name__ == "__main__" block or else multiprocessing.pool will have problems. Args: func : The function to evaluate, can only accept one argument but it can be a list or tuple args_array is the array of arguments to the input function $func. ncores : The number of nodes to pass to multiprocessing.Pool display : Will display progress if true. Returns: The results of the calculation as a numpy ndarray. ''' pool = Pool(nodes=ncores) rows = len(args_array) cols = len(args_array[0]) output = np.zeros((rows, cols)) if rows > 10: disp_rows = np.arange(rows / 10, rows, rows / 10) else: disp_rows = np.arange(1, rows, 1) if display: print("Parallel Processing Started with " + str(ncores) + " subprocesses") t0 = timer() for i in range(rows): worker_args = [] for j in range(cols): worker_args.append(args_array[i][j]) try: out = pool.map(func, worker_args) for j in range(cols): output[i, j] = out[j] if display and i in disp_rows: print(str(round(100 * i / float(rows))) + "% Complete") except Exception as e: print("Exception in _multiprocessing2D: Cannot Process") print("_multiprocessing2D: Exiting Process Early") pool.terminate() raise e tf = timer() if display: print(" ") dt = tf - t0 print("Computations Completed in: " + str(datetime.timedelta(seconds=dt))) return output
def parallelmap(func, data, nodes = None): """ Return the averaged signal and background (based on blank frames) over the given runs """ if not nodes: nodes = multiprocessing.cpu_count() - 2 pool = ProcessingPool(nodes=nodes) try: return pool.map(func, data) except KeyboardInterrupt: pool.terminate() pool.join()
def parallelmap(func, lst, nodes = None): """ Return the averaged signal and background (based on blank frames) over the given runs using multiprocessing (as opposed to MPI). """ from pathos.multiprocessing import ProcessingPool from pathos import multiprocessing if not nodes: nodes = multiprocessing.cpu_count() - 2 pool = ProcessingPool(nodes=nodes) try: return pool.map(func, lst) except KeyboardInterrupt: pool.terminate() pool.join()
def parallelmap(func, lst, nodes=None): """ Return the averaged signal and background (based on blank frames) over the given runs using multiprocessing (as opposed to MPI). """ from pathos.multiprocessing import ProcessingPool from pathos import multiprocessing if not nodes: nodes = multiprocessing.cpu_count() - 2 pool = ProcessingPool(nodes=nodes) try: return pool.map(func, lst) except KeyboardInterrupt: pool.terminate() pool.join()
def multi_get_followers_location(self, followers_ids, amount=-1, workers=10): # takes list of screen names returns dict of location counts locations = {} if amount != -1: followers_ids = random.sample(followers_ids, amount) p = Pool(workers) # Pool tells how many at a time records = p.map(self.get_loc, followers_ids) p.terminate() p.join() print(records) for i in records: if i not in locations: locations[i] = 0 locations[i] = locations[i] + 1 return locations
def wrapper(*args, **kwargs): obj, data, _args = tuple(), tuple(), tuple() if hasattr(args[0].__class__, fn.__name__): obj, data, *_args = args obj = (obj, ) else: data, *_args = args if type(data) != list: data = list(data) total_size = len(data) _batch_size = total_size // workers + 1 if batch_size is None else batch_size # assert type(data) == list, "Type of data must be list" print( f"@Parallel[workers={workers}, data_size={total_size}, batch_size={_batch_size}]: parallel for {fn.__qualname__}." ) if shuffle: print( f"@Parallel[workers={workers}, data_size={total_size}, batch_size={_batch_size}]: shuffle data for {fn.__qualname__}." ) random.shuffle(data) pool = Pool(workers) pool.terminate() pool.restart() proc = [] for beg, end in zip( range(0, total_size, _batch_size), range(_batch_size, total_size + _batch_size, _batch_size)): batch = data[beg:end] p = pool.apipe(fn, *obj, batch, *_args, **kwargs) proc.append(p) pool.close() pool.join() result = reduce_seqs([p.get() for p in proc]) if after_hook is not None: result = after_hook(result) return result
def parallel_eval(envs, eval_func, process_n=FLAGS.n_actors): # prepare the params for creating the agent and splitting the envs env_split_size = len(envs) / process_n envs_tasks = [] for i in range(process_n): process_envs = envs[i * env_split_size:(i + 1) * env_split_size] envs_tasks.append(process_envs) # distributed evaluation and pick the highest scored examples within budget print('Started distributed evaluation with %d processes...' % process_n) evaluation_pool = Pool(FLAGS.n_actors) all_example_eval_results = evaluation_pool.map(eval_func, envs_tasks) evaluation_pool.close() evaluation_pool.terminate() all_example_eval_results = reduce(lambda x, y: x + y, all_example_eval_results) print('Finished distributed evaluation.') return all_example_eval_results
def transform_saveVoxelFiles(self, cates="", source_filename = "model_normalized.obj", \ dest_filename="model_normalized.mat", dim=64, multiprocess=4, dest_samedir=True, dest_dir=""): """ Use map function to generate voxel models, you may only need this once and this will take a long time for transformation if dest_samedir is False, then dest_dir should be given """ if not dest_samedir: if dest_dir == "": self.warn( "Destination directory not given, use default dest_dir which will under current dir" ) dest_dir = "./voxelModels" dest_dir = os.path.abspath(dest_dir) if not os.path.isdir(dest_dir): os.mkdir(dest_dir) # Use multi-processor to transform models. # will only accept meshmodel because we will check source_file existence # Only when there are obj file this can work model_paths = [os.path.join(p, source_filename) for p in self.get_flattenAbsModelDir(cates) \ if os.path.isfile(os.path.join(p, source_filename)) and source_filename.endswith(".obj") ] self.info("Done model path building") if multiprocess > 1: # If package not given, will not be able to use this multiprocessing ProcessPool = Pool(multiprocess) ProcessPool.map( lambda x: self.transform_saveVoxelFile( x, dim, dest_samedir, dest_filename, dest_dir), model_paths) ProcessPool.close() ProcessPool.join() ProcessPool.terminate() else: # Use only one thread to process mesh model to voxel model for c, path in enumerate(self.random_permutation(model_paths)): self.transform_saveVoxelFile(path, dim, dest_samedir, dest_filename, dest_dir) self.info("Process: {0}/{1}".format(c + 1, len(model_paths)))
def annotate_example_decode(self, envs, eval_func, process_n=5): # prepare the params for creating the agent and splitting the envs env_split_size = len(envs) / process_n envs_tasks = [] for i in range(process_n): process_envs = envs[i * env_split_size:(i + 1) * env_split_size] envs_tasks.append(process_envs) # distributed evaluation and pick the highest scored examples within budget print('Started distributed sketch annotation with %d processes...' % process_n) evaluation_pool = Pool(process_n) all_example_eval_results = evaluation_pool.map(eval_func, envs_tasks) evaluation_pool.close() evaluation_pool.terminate() print('Finished distributed annotation.') # combine the results all_result_dict = dict() for result in all_example_eval_results: all_result_dict.update(result) return all_result_dict
def parallel_map(func, array, n_workers): def compute_batch(i): try: return func(i) except KeyboardInterrupt: raise RuntimeError("Keyboard interrupt") p = Pool(n_workers) err = None # pylint: disable=W0703,E0702 # some bs boilerplate from StackOverflow try: return p.map(compute_batch, array) except KeyboardInterrupt as e: print('got ^C while pool mapping, terminating the pool') p.terminate() err = e except Exception as e: print('got exception: %r:, terminating the pool' % (e, )) p.terminate() err = e if err is not None: raise err
sufficient_size = cluster_length_df[cluster_length_df.length >= 500000] #os.chdir(vambdir) os.system('mkdir ' + os.path.join(vambdir, 'fastas')) os.system('mkdir ' + os.path.join(vambdir, 'idfiles')) good_clusters = sufficient_size.cluster.tolist() good_clusters_df = clusters_df[clusters_df.cluster_id.isin(good_clusters)] good_clusters_df.seqid = good_clusters_df.seqid.apply(lambda x: x.split('_read_length')[0]) idfiles_dir = os.path.join(vambdir, 'idfiles') for cluster in good_clusters: try: cluster_id = int(cluster.split('_')[-1]) except: cluster_id = int(cluster) this_cluster_df = good_clusters_df[good_clusters_df.cluster_id == cluster].copy() with open(os.path.join(idfiles_dir, 'vamb_bin_' + str(cluster) + '.seqids.txt'), 'w') as outfile: [outfile.writelines(element + '\n') for element in this_cluster_df.seqid.unique().tolist()] idfiles = list(map(lambda x: os.path.join(idfiles_dir, x), os.listdir(idfiles_dir))) for idfile in idfiles: bin_name = idfile.split('.seqids')[0].split('/')[-1] os.system('cat ' + idfile + ' | pullseq -i ' + scaffolds + ' -N > ' + os.path.join(vambdir, 'fastas') + '/' + bin_name + '.fna') p.terminate() sys.exit(420)
def parmap(f, X, nprocs=multiprocessing.cpu_count(), force_parallel=False, chunk_size=1): from ResearchNLP import Constants as cn from ResearchNLP.util_files import function_cache if len(X) == 0: return [] # like map # nprocs = min(nprocs, cn.max_procs) if nprocs != multiprocessing.cpu_count() and len(X) < nprocs * chunk_size: chunk_size = 1 # use chunk_size = 1 if there is enough procs for a batch size of 1 nprocs = max(1, min(nprocs, len(X) / chunk_size)) # at least 1 if len(X) < nprocs: if cn.verbose and nprocs != multiprocessing.cpu_count(): print "parmap too much procs" nprocs = len(X) # too much procs if nprocs == 1 or (cn.serial_parmap and not force_parallel ): # we want it serial (maybe for profiling) return map(f, X) def _spawn_fun(input, func): import random, numpy from ResearchNLP import Constants as cn2 from ResearchNLP.util_files import function_cache as function_cache2 random.seed(1554 + i) numpy.random.seed(42 + i) # set random seeds try: res = func(input) res_dict = dict() res_dict["res"] = res res_dict["functions_dict"] = function_cache2.caches_dicts res_dict["experiment_purpose"] = cn2.experiment_purpose res_dict["curr_params_list"] = cn2.curr_experiment_params_list return res_dict except: import traceback traceback.print_exc() raise # re-raise exception # if chunk_size == 1: # chunk_size = math.ceil(float(len(X)) / nprocs) # all procs work on an equal chunk try: # try-catch hides bugs global proc_count old_proc_count = proc_count proc_count = nprocs p = Pool(nprocs) p.restart(force=True) retval_par = p.map( _spawn_fun, X, [f] * len(X), chunk_size=chunk_size) # can throw if current proc is daemon p.terminate() for res_dict in retval_par: # add all experiments params we missed curr_params_list = res_dict["curr_params_list"] for param in curr_params_list: cn.add_experiment_param(param) cn.experiment_purpose = retval_par[0][ "experiment_purpose"] # use the "experiment_purpose" from the fork function_cache.merge_cache_dicts_from_parallel_runs( map(lambda a: a["functions_dict"], retval_par)) # merge all retval = map(lambda res_dict: res_dict["res"], retval_par) # make it like the original map proc_count = old_proc_count global i i += 1 except AssertionError as e: if e.message == "daemonic processes are not allowed to have children": retval = map(f, X) # can't have pool inside pool else: print "error message is: " + str(e.message) raise # re-raise orig exception return retval
def create_sigmats_3_scales(dataset, no_sensors_cols, win_size_ls, normalize_each_seq=False, warm_up_time_points=''): """recives df of the data, no_sensors_cols (ls): the columns that doesnt represent sensors win_size_ls (ls): win sizes to produce (each one will be a channel in reverse order) warm_up_time_points returns list of representations (sigmat) with n dim (number of channels) for each scale, for each iter - X(PADED TO THE MAX LENGTH) shape = (num of seqs, length of seq, num of sensors/features) - y and - keys ('drone', 'update_step', 'iter') for later identification """ # compute y - if one of the recorsed is anomaly, all the sequnce classified as anomaly iter_ls = dataset.iter.unique() def create_sigmats_of_one_iter(dataset, iteri): # get current iter dataset_iteri = dataset.loc[dataset['iter'] == iteri, :] # get list of update steps update_step_ls = dataset_iteri.update_step.to_list() step_sig_mat_ls = [] for update_step in update_step_ls: # print('iter: ',iteri,'step: ', update_step) win_sig_mat_ls = [] for win_size in win_size_ls: # cut the df by current update step-win size current_seq = dataset_iteri.loc[ (dataset_iteri['update_step'] <= update_step) & (dataset_iteri['update_step'] > (update_step - win_size))] # drop irrelevant cols and convert to numpy current_seq = current_seq.drop(no_sensors_cols + ['label'], 1).to_numpy() if normalize_each_seq: current_seq = StandardScaler().fit_transform(current_seq) # convert to sig mat current_seq_sig_mat = seq_to_sig_matrix(current_seq) # add to thr ls -each elemnt with different win size win_sig_mat_ls.append(current_seq_sig_mat) # stack the 3 win size (scale) togather as channels # stacked_mats_different_scale = np.stack(win_sig_mat_ls) # add to step ls step_sig_mat_ls.append(win_sig_mat_ls) # stack all steps # stacked_mats_of_iter = np.stack(step_sig_mat_ls) # add to iter ls iter_sig_mat_np = np.array(step_sig_mat_ls) iter_sig_mat_np = np.rollaxis(np.array(iter_sig_mat_np), 1, 4) return { 'sig_mat': iter_sig_mat_np, 'keys': dataset_iteri[['drone', 'update_step', 'iter']], 'labels': dataset_iteri.label.to_numpy() } workers = multiprocessing.cpu_count() print('Number of workers: ', workers) pool = ProcessingPool(workers) list_of_iters_dict = pool.map( lambda iter: create_sigmats_of_one_iter(dataset, iter), iter_ls) pool.close() pool.join() pool.terminate() pool.clear() iters_sig_mat_ls = [ iter_dict['sig_mat'] for iter_dict in list_of_iters_dict ] iters_lables_ls = [iter_dict['labels'] for iter_dict in list_of_iters_dict] iters_keys_ls = [iter_dict['keys'] for iter_dict in list_of_iters_dict] print( 'shape of first iter X {} shape of first iter labels {} shape keys {}'. format(iters_sig_mat_ls[0].shape, iters_lables_ls[0].shape, iters_keys_ls[0].shape)) return iters_sig_mat_ls, iters_lables_ls, iters_keys_ls
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'source_path', help="Path to the video or audio file to subtitle", nargs='?') parser.add_argument( '-C', '--concurrency', help="Number of concurrent API requests to make", type=int, default=10) parser.add_argument( '-o', '--output', help= "Output path for subtitles (by default, subtitles are saved in \ the same directory and name as the source path)" ) parser.add_argument('-F', '--format', help="Destination subtitle format", default="srt") parser.add_argument('-S', '--src-language', help="Language spoken in source file", default="en") parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles", default="en") parser.add_argument( '-K', '--api-key', help= "The Google Translate API key to be used. (Required for subtitle translation)" ) parser.add_argument('--list-formats', help="List all available subtitle formats", action='store_true') parser.add_argument( '--list-languages', help="List all available source/destination languages", action='store_true') if (os.name == "posix"): print os.system("uname -a") else: print "unknown OS" args = parser.parse_args() # print "arguments",args args.source_path = str(self.filename) print args.source_path, "SOURCE PATH" # print "CONCURRENCY >>>", args.concurrency # print args path = args.source_path[:-3] srt_path = path + "srt" print srt_path audio_filename, audio_rate = extract_audio(args.source_path) regions = find_speech_regions(audio_filename) pool = ProcessingPool(args.concurrency) converter = FLACConverter(source_path=audio_filename) transcripts = [] if regions: try: widgets = [ "Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() extracted_regions = [] for i, extracted_region in enumerate( pool.imap(converter, regions)): extracted_regions.append(extracted_region) pbar.update(i) self.progress1.setValue(i) pbar.finish() except KeyboardInterrupt: pbar.finish() pool.terminate() pool.join() print "Cancelling transcription" return 1 os.remove(audio_filename) return 0
class RPKI_File: """This class gets validity data from ripe""" __slots__ = ["path", "total_lines", "_process"] _dir = "/tmp/" hosted_name = "upo_csv_path.csv.gz" port = 8000 def __init__(self, table_input): """Downloads and stores roas from a json""" self.path = self._dir + self.hosted_name.replace(".gz", "") with Unique_Prefix_Origins_Table(clear=True) as _db: _db.fill_table(table_input) _db.copy_table(self.path) self.total_lines = utils.get_lines_in_file(self.path) self._gzip_file() ################################# ### Context Manager Functions ### ################################# def __enter__(self): """What to do when the context manager is called on this class Starts the process for serving the file""" self.spawn_process() return self def __exit__(self, type, value, traceback): """Closes the file process""" self.close() ############################ ### Serve File Functions ### ############################ def spawn_process(self): """Spawns file serving process""" utils.kill_port(self.port) self._process = ProcessingPool() self._process.apipe(self._serve_file) logging.debug("Served RPKI File") def close(self): """Closes file process""" utils.kill_port(self.port, wait=False) self._process.close() self._process.terminate() self._process.join() self._process.clear() # changed to absolute path utils.delete_paths(os.path.join(self._dir, self.hosted_name)) logging.debug("Closed RPKI File") ######################## ### Helper Functions ### ######################## def _gzip_file(self): """gzips the file for proper formatting in rpki validator""" with open(self.path, 'rb') as f_in, gzip.open( os.path.join(self._dir, self.hosted_name), 'wb') as f_out: f_out.writelines(f_in) utils.delete_paths(self.path) def _serve_file(self): """Makes a simple http server and serves a file in /tmp""" class Handler(http.server.SimpleHTTPRequestHandler): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Changes directory to be in /tmp os.chdir(self._dir) # Serve the file on port 8000 socketserver.TCPServer(("", RPKI_File.port), Handler).serve_forever()
def compute_DTW_to_each_drone(drones_df_ls, win_size, no_sensors_cols, per_series=False, process_gps=True, use_scaler=True): print('Start compute DTW') dataset = pd.concat(drones_df_ls) dataset = dataset.sort_values(['iter', 'update_step', 'drone']).reset_index(drop=True) drones = dataset.drone.unique() numOfDrones = len(drones) start = time.time() # iter = '0simple' # dataset_iteri = dataset.loc[dataset['iter'] == iter, :] iters = dataset.iter.unique() # create empty df for results # itearte over iterartions def compute_DTW_on_iter(dataset, iter, numOfDrones, drones, per_series=True): print('iter: ', iter) dtw_results_dict = { 'iter': [], 'update_step': [], 'drone': [], 'comparison_drone': [], 'DTW_dist': [] } # print('iter: ',iter ) dataset_iter = dataset.loc[dataset['iter'] == iter, :] # cut the df by current update step-win size update_step_ls = dataset_iter.update_step.unique() # num of features (all columns - no sensor columns and label num_of_features = dataset_iter.shape[1] - len(no_sensors_cols + ['label']) # iterate over time steps for update_step in update_step_ls: current_seq = dataset_iter.loc[ (dataset_iter['update_step'] <= update_step) & (dataset_iter['update_step'] > (update_step - win_size))] # iterte over drones for droneIidx in range(numOfDrones): currentDrone = drones[droneIidx] currentDroneDf = current_seq.loc[current_seq.drone == currentDrone, :] # drop irrelevant cols and convert to numpy currentDroneNp = currentDroneDf.drop( no_sensors_cols + ['label'], 1).to_numpy() if use_scaler: scaled_currentDroneNp = StandardScaler().fit_transform( currentDroneNp) else: scaled_currentDroneNp = currentDroneNp for droneJidx in range(numOfDrones): # dont compare drone to itself if (droneIidx >= droneJidx): continue # print(droneIidx, droneJidx) otherDrone = drones[droneJidx] otherDroneDf = current_seq.loc[current_seq.drone == otherDrone, :] otherDroneNp = otherDroneDf.drop( no_sensors_cols + ['label'], 1).to_numpy() if use_scaler: scaled_otherDroneNp = StandardScaler().fit_transform( otherDroneNp) else: scaled_otherDroneNp = otherDroneNp """compute DTW""" if per_series: # compute between each pair of series, return list dist = [ dtw_path(scaled_currentDroneNp[:, i], scaled_otherDroneNp[:, i])[1] for i in range(num_of_features) ] dist = np.array(dist) else: # path, dist = dtw_path(scaled_currentDroneNp, scaled_otherDroneNp) path = '' dist = dtw(scaled_currentDroneNp, scaled_otherDroneNp, window_type="sakoechiba", window_args={ 'window_size': 60 }).distance # print('Iter {} updatestep {} DroneI {} DroneJ {} DTW {}'.format(iter,update_step,currentDrone, otherDrone, dist)) # save results of current drone dtw_results_dict['iter'].append(iter) dtw_results_dict['update_step'].append(update_step) dtw_results_dict['drone'].append(currentDrone) dtw_results_dict['comparison_drone'].append(otherDrone) dtw_results_dict['DTW_dist'].append( dist) # ; dtw_results_dict['DTW_path'].append(path) # save results of other drone dtw_results_dict['iter'].append(iter) dtw_results_dict['update_step'].append(update_step) dtw_results_dict['drone'].append(otherDrone) dtw_results_dict['comparison_drone'].append(currentDrone) dtw_results_dict['DTW_dist'].append( dist) # ; dtw_results_dict['DTW_path'].append(path) print('iter done: ', iter) return dtw_results_dict workers = multiprocessing.cpu_count() print('Number of workers: ', workers) workers = np.min([workers, len(iters)]) pool = ProcessingPool(workers) list_of_iters_dict = list( pool.map( lambda iter: compute_DTW_on_iter(dataset, iter, numOfDrones, drones, per_series), iters)) pool.close() pool.join() pool.terminate() pool.clear() # from list of dicts to one dict dtw_results_dict = { 'iter': [], 'update_step': [], 'drone': [], 'comparison_drone': [], 'DTW_dist': [] } [ dtw_results_dict[result_key].append(value) for dict in list_of_iters_dict for result_key, list in dict.items() for value in list ] print('time took: ', time.time() - start) dtw_results_df = pd.DataFrame.from_dict(dtw_results_dict) dtw_results_df = dtw_results_df.sort_values( ['iter', 'update_step', 'drone']).reset_index(drop=True) dtw_results_df_after_removal_ls = [] return dtw_results_df
""" Example of a script for converting two video files in parallel Additional dependencies: - pathos Author(s) : Fabrice Zaoui (EDF R&D LNHE) Copyright EDF 2018 """ from sonaris import Sonaris from pathos.multiprocessing import ProcessingPool as Pool def run(video_list): video_list.convert() # ARIS files to convert and associated AVI file conversion_1 = Sonaris('video_test.aris', '2014_1.avi') conversion_2 = Sonaris('video_test.aris', '2014_2.avi') # list Sonaris jobs tab = [conversion_1, conversion_2] # use a number of processors (ideally one proc. per ARIS file) pool = Pool(nodes=2) # launch conversion pool.map(run, tab) # close pool pool.terminate() pool.join()
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'source_path', help="Path to the video or audio file to subtitle", nargs='?') parser.add_argument( '-C', '--concurrency', help="Number of concurrent API requests to make", type=int, default=10) parser.add_argument( '-o', '--output', help= "Output path for subtitles (by default, subtitles are saved in \ the same directory and name as the source path)" ) parser.add_argument('-F', '--format', help="Destination subtitle format", default="srt") parser.add_argument('-S', '--src-language', help="Language spoken in source file", default="en") parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles", default="en") parser.add_argument( '-K', '--api-key', help= "The Google Translate API key to be used. (Required for subtitle translation)" ) parser.add_argument('--list-formats', help="List all available subtitle formats", action='store_true') parser.add_argument( '--list-languages', help="List all available source/destination languages", action='store_true') args = parser.parse_args() print args if (os.name == "posix"): args.source_path = str(self.filename) else: args.source_path = (str(self.filename)).replace("/", "\\") pas = (args.source_path).replace("/", "\\") args.source_path = pas print " Printing pas >>>", pas print args path = args.source_path[:-3] srt_path = path + "srt" if args.list_formats: print("List of formats:") for subtitle_format in FORMATTERS.keys(): print("{format}".format(format=subtitle_format)) return 0 if args.list_languages: print("List of all languages:") for code, language in sorted(LANGUAGE_CODES.items()): print("{code}\t{language}".format(code=code, language=languages)) return 0 if args.format not in FORMATTERS.keys(): print( "Subtitle format not supported. Run with --list-formats to see all supported formats." ) return 1 if args.src_language not in LANGUAGE_CODES.keys(): print( "Source language not supported. Run with --list-languages to see all supported languages." ) return 1 if args.dst_language not in LANGUAGE_CODES.keys(): print( "Destination language not supported. Run with --list-languages to see all supported languages." ) return 1 if not args.source_path: print("Error: You need to specify a source path.") return 1 audio_filename, audio_rate = extract_audio(args.source_path) regions = find_speech_regions(audio_filename) pool = ProcessingPool(args.concurrency) converter = FLACConverter(source_path=audio_filename) recognizer = SpeechRecognizer(language=args.src_language, rate=audio_rate, api_key=GOOGLE_SPEECH_API_KEY) transcripts = [] if regions: try: widgets = [ "Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() extracted_regions = [] for i, extracted_region in enumerate( pool.imap(converter, regions)): extracted_regions.append(extracted_region) pbar.update(i) self.progress1.setValue(i) pbar.finish() widgets = [ "Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() for i, transcript in enumerate( pool.imap(recognizer, extracted_regions)): transcripts.append(transcript) pbar.update(i) self.progress2.setValue(i) pbar.finish() QMessageBox.about(self, "Subtitles created", "Created at " + srt_path) if not is_same_language(args.src_language, args.dst_language): if args.api_key: google_translate_api_key = args.api_key translator = Translator(args.dst_language, google_translate_api_key, dst=args.dst_language, src=args.src_language) prompt = "Translating from {0} to {1}: ".format( args.src_language, args.dst_language) widgets = [ prompt, Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() translated_transcripts = [] for i, transcript in enumerate( pool.imap(translator, transcripts)): translated_transcripts.append(transcript) pbar.update(i) self.progress2.setValue(i) pbar.finish() transcripts = translated_transcripts else: print "Error: Subtitle translation requires specified Google Translate API key. \See --help for further information." return 1 except KeyboardInterrupt: pbar.finish() pool.terminate() pool.join() print "Cancelling transcription" return 1 timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get(args.format) formatted_subtitles = formatter(timed_subtitles) dest = args.output if not dest: base, ext = os.path.splitext(args.source_path) dest = "{base}.{format}".format(base=base, format=args.format) with open(dest, 'wb') as f: f.write(formatted_subtitles.encode("utf-8")) print "Subtitles file created at {}".format(dest) os.remove(audio_filename) return 0
class RPKI_Validator_Wrapper: """This class gets validity data from ripe""" __slots__ = ['total_prefix_origin_pairs', "_process", "_table_input", "_rpki_file"] # Sorry for the crazy naming scheme, must be done to avoid # having install file names in multiple locations temp_install_path = "/tmp/temp_rpki_validator_install" rpki_package_path = RPKI_PACKAGE_PATH rpki_run_name = RPKI_RUN_NAME rpki_run_path = RPKI_PACKAGE_PATH + RPKI_RUN_NAME rpki_db_paths = [RPKI_PACKAGE_PATH + x for x in ["db/", "rsync/"]] port = 8080 api_url = "http://[::1]:8080/api/" def __init__(self, **kwargs): config_logging(kwargs.get("stream_level", logging.INFO), kwargs.get("section")) self._table_input = kwargs.get("table_input", "mrt_rpki") if not os.path.exists(self.rpki_package_path): logging.warning("Looks like validator is not installed") logging.warning("Installing validator now") RPKI_Validator_Wrapper.install(**kwargs) ################################# ### Context Manager Functions ### ################################# def __enter__(self): """Runs the RPKI Validator""" utils.kill_port(self.port) # Must remove these to ensure a clean run utils.clean_paths(self.rpki_db_paths) cmds = [f"cd {self.rpki_package_path}", f"chown -R root:root {self.rpki_package_path}"] utils.run_cmds(cmds) # Writes validator file and serves it # Can't use cntext manager here since it returns it self._rpki_file = RPKI_File(self._table_input) self._rpki_file.spawn_process() self._process = ProcessingPool() self._process.apipe(self._start_validator) self.total_prefix_origin_pairs = self._rpki_file.total_lines return self def __exit__(self, type, value, traceback): """Closes RPKI Validator""" self._process.close() self._process.terminate() self._process.join() self._process.clear() utils.kill_port(self.port, wait=False) logging.debug("Closed rpki validator") self._rpki_file.close() def _start_validator(self): """Sends start cmd to RPKI Validator""" logging.info("Starting RPKI Validator") utils.run_cmds((f"cd {self.rpki_package_path} && " f"./{self.rpki_run_name}")) ######################### ### Wrapper Functions ### ######################### def load_trust_anchors(self): """Loads all trust anchors""" utils.write_to_stdout(f"{datetime.now()}: Loading RPKI Validator\n", logging.root.level) time.sleep(60) while self._get_validation_status() is False: time.sleep(10) utils.write_to_stdout(".", logging.root.level) utils.write_to_stdout("\n", logging.root.level) self._wait(30, "Waiting for upload to bgp preview") def make_query(self, api_endpoint: str, data=True) -> dict: """Makes query to api of rpki validator""" result = utils.get_json(os.path.join(self.api_url, api_endpoint), RPKI_Validator_Wrapper.get_headers()) return result["data"] if data else result def get_validity_data(self) -> dict: """Gets the data from ripe and formats it for csv insertions""" logging.info("Getting data from ripe") assert self.total_prefix_origin_pairs < 10000000, "page size too small" # Then we get the data from the ripe RPKI validator # Todo for later, change 10mil to be total count return self.make_query("bgp/?pageSize=10000000") ######################## ### Helper Functions ### ######################## def _wait(self, time_to_sleep: int, msg: str): """logs a message and waits""" logging.debug(msg) if logging.root.level == logging.INFO: # Number of times per second to update tqdm divisor = 100 for _ in trange(time_to_sleep * divisor, desc=msg): time.sleep(1 / divisor) def _get_validation_status(self) -> bool: """Returns row count of json object for waiting""" try: for x in self.make_query("trust-anchors/statuses"): if x["completedValidation"] is False: # If anything has not been validated return false return False # All are validated. Return true return True except urllib.error.URLError as e: self._wait(60, "Connection was refused") return False ###################### ### Static methods ### ###################### @staticmethod def get_validity_dict() -> dict: """Returns the validity dict for the RPKI Validator to decode results I could have this as a class attribute but too messy I think. """ return {"VALID": ROA_Validity.VALID.value, "UNKNOWN": ROA_Validity.UNKNOWN.value, "INVALID_LENGTH": ROA_Validity.INVALID_BY_LENGTH.value, "INVALID_ASN": ROA_Validity.INVALID_BY_ORIGIN.value} @staticmethod def get_headers() -> dict: """Gets the headers for all url queries to the validator""" return {"Connection": "keep-alive", "Cache-Control": "max-age=0", "Upgrade-Insecure-Requests": 1, "User-Agent": ("Mozilla/5.0 (X11; Linux x86_64)" " AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/73.0.3683.86 Safari/537.36"), "Accept": ("text/html,application/xhtml+xml," "application/xml;q=0.9,image/webp," "image/apng,*/*;q=0.8," "application/signed-exchange;v=b3"), "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "en-US,en;q=0.9"} ######################### ### Install Functions ### ######################### @staticmethod def install(**kwargs): """Installs RPKI validator with our configs. This might break in the future, but we need to do it this way for now to be able to do what we want with our own prefix origin table. """ config_logging(kwargs.get("stream_level", logging.DEBUG), kwargs.get("section")) utils.delete_paths([RPKI_Validator_Wrapper.rpki_package_path, RPKI_Validator_Wrapper.temp_install_path]) RPKI_Validator_Wrapper._download_validator() RPKI_Validator_Wrapper._change_file_hosted_location() path = RPKI_Validator_Wrapper._change_server_address() RPKI_Validator_Wrapper._config_absolute_paths(path) @staticmethod def _download_validator(): """Downloads validator into proper location""" rpki_url = ("https://ftp.ripe.net/tools/rpki/validator3/beta/generic/" "rpki-validator-3-latest-dist.tar.gz") arin_tal = ("https://www.arin.net/resources/manage/rpki/" "arin-ripevalidator.tal") # This is the java version they use so we will use it cmds = [f"mkdir {RPKI_Validator_Wrapper.temp_install_path}", f"cd {RPKI_Validator_Wrapper.temp_install_path}", "sudo apt-get -y install openjdk-8-jre", f"wget {rpki_url}", "tar -xvf rpki-validator-3-latest-dist.tar.gz", "rm -rf rpki-validator-3-latest-dist.tar.gz", f"mv rpki-validator* {RPKI_Validator_Wrapper.rpki_package_path}", f"cd {RPKI_Validator_Wrapper.rpki_package_path}", "cd preconfigured-tals", f"wget {arin_tal}"] utils.run_cmds(cmds) @staticmethod def _change_file_hosted_location(): """Changes location of input ann for bgp preview file""" # Changes where the file is hosted path = (f"{RPKI_Validator_Wrapper.rpki_package_path}conf" "/application-defaults.properties") prepend = "rpki.validator.bgp.ris.dump.urls=" replace = ("https://www.ris.ripe.net/dumps/riswhoisdump.IPv4.gz," "https://www.ris.ripe.net/dumps/riswhoisdump.IPv6.gz") replace_with = (f"http://localhost:{RPKI_File.port}" f"/{RPKI_File.hosted_name}") utils.replace_line(path, prepend, replace, replace_with) @staticmethod def _change_server_address(): """Prob because of a proxy, but on our server this is necessary""" # Changes the server address path = (f"{RPKI_Validator_Wrapper.rpki_package_path}conf" "/application.properties") prepend = "server.address=" replace = "localhost" replace_with = "0.0.0.0" utils.replace_line(path, prepend, replace, replace_with) return path @staticmethod def _config_absolute_paths(path): """Configure rpki validator to run off absolute paths This is necessary due to script being called from elsewhere In other words not from inside the RPKI dir. """ # Since I am calling the script from elsewhere these must be # absolute paths prepend = "rpki.validator.data.path=" replace = "." # Must remove trailing backslash at the end replace_with = RPKI_Validator_Wrapper.rpki_package_path[:-1] utils.replace_line(path, prepend, replace, replace_with) prepend = "rpki.validator.preconfigured.trust.anchors.directory=" replace = "./preconfigured-tals" replace_with = (f"{RPKI_Validator_Wrapper.rpki_package_path}" "preconfigured-tals") utils.replace_line(path, prepend, replace, replace_with) prepend = "rpki.validator.rsync.local.storage.directory=" replace = "./rsync" replace_with = f"{RPKI_Validator_Wrapper.rpki_package_path}rsync" utils.replace_line(path, prepend, replace, replace_with)