コード例 #1
0
    def multi_Non_Tweep_friends(self, handle):
        min_position, links = self.get_tweets(handle)
        print("Scraping last 100 days of activity")

        while (True):
            min_position1, links1 = self.get_tweets(handle, min_position)
            links = links + links1
            if (min_position1 == None):
                break
            min_position = min_position1

        people_list = []

        link = [x for x in links if handle in x]
        link = self.duplicates(link)

        p = Pool(10)  # Pool tells how many at a time
        with Pool(10) as p:

            records = list(tqdm(p.imap(self.get_people, link),
                                total=len(link)))
            p.terminate()
            p.join()
            p.close()
            people_list = [item for sublist in records for item in sublist]
            people_list = self.duplicates(people_list)

        people_list = [x for x in people_list if x != handle]

        return (people_list)
コード例 #2
0
ファイル: cloudium.py プロジェクト: june5079/cloudium
    def certScanner (self) :
        p = Pool(nodes = 512)
        cprint ("[+] Keywords : " + " ".join(str(x) for x in self.keywordList), 'green')
        # self.allipAddrList = self.shuffleList()
        self.allipAddrList = [x for x in self.shuffleList() if self.region in x ]
        
        for self.tryipClass in self.allipAddrList:
            self.ipExtractResult = self.ipExtract(self.tryipClass.split("@")[0])
            _max = len(self.ipExtractResult)
            cprint ("[+] Scanning IP Addr Class : " + self.tryipClass + "\t-- Number of scan target is :" + str(len(self.ipExtractResult)), 'green')

            with tqdm(total=_max) as pbar:
                pbar.set_description("[+] Progressing : %s " %self.tryipClass)
                for i, domain in tqdm(enumerate(p.imap(self.certChecker, self.ipExtractResult))):
                    pbar.update()
                    if domain is not None:
                        self.resList.append(domain)
                pbar.close()
                p.terminate() # Like p.close()
                p.restart() # Like p.join()

            if self.resList:
                self.printRes()

            else:
                cprint ("[!] No kewords found on this IP class \n", 'red')

            time.sleep(1)
            self.ipExtractResult = []
            self.resList = []
コード例 #3
0
def extract_hits(bins_to_contig_lists, outdir, contig_file, threads):
    p = Pool(threads)

    pullseq_tmp = os.path.join(outdir, 'pullseq_ids_tmp')
    if not os.path.exists(pullseq_tmp):
        os.system('mkdir ' + pullseq_tmp)

    def pullseq_by_bin(bin_name, contig_list, contig_file):
        #Generates a file with the names of all the contigs to pull out
        #then provides that to pullseq;
        #parses the resulting fasta output from pullseq and then
        #passes it back.
        with open(os.path.join(pullseq_tmp, bin_name + '.txt'),
                  'w') as outfile:
            for element in contig_list:
                outfile.writelines(element + '\n')

        os.system('pullseq -i ' + contig_file + ' -n ' +
                  os.path.join(pullseq_tmp, bin_name + '.txt') + ' > ' +
                  os.path.join(outdir, bin_name + '.fasta'))

        return

    p.map(lambda x: pullseq_by_bin(x, bins_to_contig_lists[x], contig_file),
          bins_to_contig_lists)
    #for bin in bins_to_contig_lists:
    #    pullseq_by_bin(bin, bins_to_contig_lists[bin], contig_file)

    os.system('rm -rf ' + pullseq_tmp)
    p.terminate()
    return
コード例 #4
0
class ConsensusMHSampler(MHSampler):
	def __init__(self, log_f, log_g, g_sample, x0, iterations, shards=1):
		super(ConsensusMHSampler, self).__init__(log_f, log_g, g_sample, x0, iterations)
		self.shards = shards

		assert len(self.log_distribution_fn) == self.shards
		self.log_fn_dict = {} # for pickling purposes
		for i in range(self.shards):
			self.log_fn_dict[i] = self.log_distribution_fn[i]

		self.pool = Pool(nodes=self.shards)

	def sample(self):
		map_results = self.pool.map(self.map_sample, range(self.shards))
		self.pool.close()
		self.pool.join()
		self.pool.terminate()
		self.pool.restart()
		self.saved_states = self.reduce_sample(map_results)

	def map_sample(self, index):
		np.random.seed(1)
		cur_state = self.start_state
		sample_results = [cur_state]
		prob, count = 0, 0

		for i in range(self.iterations):
			if i % 5000 == 0:
				print("iteration {}".format(i))
			candidate_state = self.get_transition_sample(cur_state)
			acceptance = self.calculate_acceptance_ratio(candidate_state, self.log_fn_dict[index])
			prob += acceptance
			count += 1

			new_state = self.transition_step(cur_state, candidate_state, acceptance)
			sample_results.append(new_state)
			cur_state = new_state
		sample_results = np.array(sample_results)

		print("INDEX {}: Avg acceptance prob is {}".format(index, prob/count))

		return (sample_results, 1.0 / (1e-8 + self.get_sample_variance(sample_results)))

	def get_sample_variance(self, data):
		return np.linalg.norm(np.var(np.array(data), axis=0))

	def reduce_sample(self, results):
		'''
			results is a list of (sample_array, weight) tuples
		'''
		sample_results = 0
		total_weight = 0
		for sample, weight in results:
			sample_results += weight * sample
			total_weight += weight

		return sample_results / total_weight
コード例 #5
0
def parallelize_dataframe(df: pd.DataFrame, func, n_cores=4) -> pd.DataFrame:
    df_split = np.array_split(df, n_cores)
    pool = Pool(n_cores)
    df = pd.concat(pool.map(func, df_split))
    pool.close()
    pool.join()
    # have to include this to prevent leakage and allow multiple parallel function calls
    pool.terminate()
    pool.restart()
    return df
コード例 #6
0
def _multiprocess2D(func, args_array, ncores=4, display=True):
    '''
    Multipurpose parallel processing

    Takes a function and an array of arguments, evaluates the function with the given
    arguments for each point, processing in parallel using ncores number of parallel
    processes.

    WARNING: needs to be protected by a if __name__ == "__main__" block or else
    multiprocessing.pool will have problems.

    Args:
        func : The function to evaluate, can only accept one argument but it can be a list
            or tuple
        args_array is the array of arguments to the input function $func.
        ncores : The number of nodes to pass to multiprocessing.Pool
        display : Will display progress if true.

    Returns:
        The results of the calculation as a numpy ndarray.
    '''
    pool = Pool(nodes=ncores)
    rows = len(args_array)
    cols = len(args_array[0])
    output = np.zeros((rows, cols))
    if rows > 10:
        disp_rows = np.arange(rows / 10, rows, rows / 10)
    else:
        disp_rows = np.arange(1, rows, 1)
    if display:
        print("Parallel Processing Started with " + str(ncores) +
              " subprocesses")
    t0 = timer()
    for i in range(rows):
        worker_args = []
        for j in range(cols):
            worker_args.append(args_array[i][j])
        try:
            out = pool.map(func, worker_args)
            for j in range(cols):
                output[i, j] = out[j]
            if display and i in disp_rows:
                print(str(round(100 * i / float(rows))) + "% Complete")
        except Exception as e:
            print("Exception in _multiprocessing2D: Cannot Process")
            print("_multiprocessing2D: Exiting Process Early")
            pool.terminate()
            raise e
    tf = timer()
    if display:
        print(" ")
        dt = tf - t0
        print("Computations Completed in: " +
              str(datetime.timedelta(seconds=dt)))
    return output
コード例 #7
0
ファイル: utils.py プロジェクト: hoidn/packages
def parallelmap(func, data, nodes = None):
    """
    Return the averaged signal and background (based on blank frames) over the given runs
    """
    if not nodes:
        nodes = multiprocessing.cpu_count() - 2
    pool = ProcessingPool(nodes=nodes)
    try:
        return pool.map(func, data)
    except KeyboardInterrupt:
        pool.terminate()
        pool.join()
コード例 #8
0
ファイル: utils.py プロジェクト: hoidn/utils
def parallelmap(func, lst, nodes = None):
    """
    Return the averaged signal and background (based on blank frames) over the given runs using
    multiprocessing (as opposed to MPI).
    """
    from pathos.multiprocessing import ProcessingPool
    from pathos import multiprocessing
    if not nodes:
        nodes = multiprocessing.cpu_count() - 2
    pool = ProcessingPool(nodes=nodes)
    try:
        return pool.map(func, lst)
    except KeyboardInterrupt:
        pool.terminate()
        pool.join()
コード例 #9
0
def parallelmap(func, lst, nodes=None):
    """
    Return the averaged signal and background (based on blank frames) over the given runs using
    multiprocessing (as opposed to MPI).
    """
    from pathos.multiprocessing import ProcessingPool
    from pathos import multiprocessing
    if not nodes:
        nodes = multiprocessing.cpu_count() - 2
    pool = ProcessingPool(nodes=nodes)
    try:
        return pool.map(func, lst)
    except KeyboardInterrupt:
        pool.terminate()
        pool.join()
コード例 #10
0
ファイル: stalker.py プロジェクト: abhidya/Rewteet_graph
    def multi_get_followers_location(self, followers_ids, amount=-1,
                                     workers=10):  # takes list of screen names returns dict of location counts
        locations = {}
        if amount != -1:
            followers_ids = random.sample(followers_ids, amount)

        p = Pool(workers)  # Pool tells how many at a time
        records = p.map(self.get_loc, followers_ids)
        p.terminate()
        p.join()
        print(records)

        for i in records:
            if i not in locations:
                locations[i] = 0
            locations[i] = locations[i] + 1

        return locations
コード例 #11
0
ファイル: annotation.py プロジェクト: WingCHWang/KG-Tools
        def wrapper(*args, **kwargs):

            obj, data, _args = tuple(), tuple(), tuple()
            if hasattr(args[0].__class__, fn.__name__):
                obj, data, *_args = args
                obj = (obj, )
            else:
                data, *_args = args

            if type(data) != list:
                data = list(data)

            total_size = len(data)
            _batch_size = total_size // workers + 1 if batch_size is None else batch_size
            # assert type(data) == list, "Type of data must be list"
            print(
                f"@Parallel[workers={workers}, data_size={total_size}, batch_size={_batch_size}]: parallel for {fn.__qualname__}."
            )

            if shuffle:
                print(
                    f"@Parallel[workers={workers}, data_size={total_size}, batch_size={_batch_size}]: shuffle data for {fn.__qualname__}."
                )
                random.shuffle(data)

            pool = Pool(workers)
            pool.terminate()
            pool.restart()

            proc = []
            for beg, end in zip(
                    range(0, total_size, _batch_size),
                    range(_batch_size, total_size + _batch_size, _batch_size)):
                batch = data[beg:end]
                p = pool.apipe(fn, *obj, batch, *_args, **kwargs)
                proc.append(p)
            pool.close()
            pool.join()

            result = reduce_seqs([p.get() for p in proc])
            if after_hook is not None:
                result = after_hook(result)

            return result
コード例 #12
0
    def parallel_eval(envs, eval_func, process_n=FLAGS.n_actors):
        # prepare the params for creating the agent and splitting the envs
        env_split_size = len(envs) / process_n

        envs_tasks = []
        for i in range(process_n):
            process_envs = envs[i * env_split_size:(i + 1) * env_split_size]
            envs_tasks.append(process_envs)

        # distributed evaluation and pick the highest scored examples within budget
        print('Started distributed evaluation with %d processes...' %
              process_n)
        evaluation_pool = Pool(FLAGS.n_actors)
        all_example_eval_results = evaluation_pool.map(eval_func, envs_tasks)
        evaluation_pool.close()
        evaluation_pool.terminate()
        all_example_eval_results = reduce(lambda x, y: x + y,
                                          all_example_eval_results)
        print('Finished distributed evaluation.')

        return all_example_eval_results
コード例 #13
0
    def transform_saveVoxelFiles(self, cates="", source_filename = "model_normalized.obj", \
                             dest_filename="model_normalized.mat", dim=64, multiprocess=4, dest_samedir=True, dest_dir=""):
        """
        Use map function to generate voxel models, you may only need this once and this will take a long time for transformation
        if dest_samedir is False, then dest_dir should be given
        """
        if not dest_samedir:
            if dest_dir == "":
                self.warn(
                    "Destination directory not given, use default dest_dir which will under current dir"
                )
                dest_dir = "./voxelModels"
            dest_dir = os.path.abspath(dest_dir)
            if not os.path.isdir(dest_dir):
                os.mkdir(dest_dir)

        # Use multi-processor to transform models.
        # will only accept meshmodel because we will check source_file existence
        # Only when there are obj file this can work
        model_paths = [os.path.join(p, source_filename) for p in self.get_flattenAbsModelDir(cates) \
                       if os.path.isfile(os.path.join(p, source_filename)) and source_filename.endswith(".obj") ]
        self.info("Done model path building")

        if multiprocess > 1:
            # If package not given, will not be able to use this multiprocessing
            ProcessPool = Pool(multiprocess)
            ProcessPool.map(
                lambda x: self.transform_saveVoxelFile(
                    x, dim, dest_samedir, dest_filename, dest_dir),
                model_paths)
            ProcessPool.close()
            ProcessPool.join()
            ProcessPool.terminate()
        else:
            # Use only one thread to process mesh model to voxel model
            for c, path in enumerate(self.random_permutation(model_paths)):
                self.transform_saveVoxelFile(path, dim, dest_samedir,
                                             dest_filename, dest_dir)
                self.info("Process: {0}/{1}".format(c + 1, len(model_paths)))
コード例 #14
0
    def annotate_example_decode(self, envs, eval_func, process_n=5):
        # prepare the params for creating the agent and splitting the envs
        env_split_size = len(envs) / process_n

        envs_tasks = []
        for i in range(process_n):
            process_envs = envs[i * env_split_size:(i + 1) * env_split_size]
            envs_tasks.append(process_envs)

        # distributed evaluation and pick the highest scored examples within budget
        print('Started distributed sketch annotation with %d processes...' %
              process_n)
        evaluation_pool = Pool(process_n)
        all_example_eval_results = evaluation_pool.map(eval_func, envs_tasks)
        evaluation_pool.close()
        evaluation_pool.terminate()
        print('Finished distributed annotation.')

        # combine the results
        all_result_dict = dict()
        for result in all_example_eval_results:
            all_result_dict.update(result)

        return all_result_dict
コード例 #15
0
ファイル: parallel.py プロジェクト: pltrdy/autoalign
def parallel_map(func, array, n_workers):
    def compute_batch(i):
        try:
            return func(i)
        except KeyboardInterrupt:
            raise RuntimeError("Keyboard interrupt")

    p = Pool(n_workers)
    err = None
    # pylint: disable=W0703,E0702
    # some bs boilerplate from StackOverflow
    try:
        return p.map(compute_batch, array)
    except KeyboardInterrupt as e:
        print('got ^C while pool mapping, terminating the pool')
        p.terminate()
        err = e
    except Exception as e:
        print('got exception: %r:, terminating the pool' % (e, ))
        p.terminate()
        err = e

    if err is not None:
        raise err
コード例 #16
0
sufficient_size = cluster_length_df[cluster_length_df.length >= 500000]

#os.chdir(vambdir)
os.system('mkdir ' + os.path.join(vambdir, 'fastas'))
os.system('mkdir ' + os.path.join(vambdir, 'idfiles'))

good_clusters = sufficient_size.cluster.tolist()
good_clusters_df = clusters_df[clusters_df.cluster_id.isin(good_clusters)]

good_clusters_df.seqid = good_clusters_df.seqid.apply(lambda x: x.split('_read_length')[0])
idfiles_dir = os.path.join(vambdir, 'idfiles')

for cluster in good_clusters:
    try:
        cluster_id = int(cluster.split('_')[-1])
    except:
        cluster_id = int(cluster)
    this_cluster_df = good_clusters_df[good_clusters_df.cluster_id == cluster].copy()
    with open(os.path.join(idfiles_dir, 'vamb_bin_' + str(cluster) + '.seqids.txt'), 'w') as outfile:
        [outfile.writelines(element + '\n') for element in this_cluster_df.seqid.unique().tolist()]

idfiles = list(map(lambda x: os.path.join(idfiles_dir, x), os.listdir(idfiles_dir)))

for idfile in idfiles:
    bin_name = idfile.split('.seqids')[0].split('/')[-1]
    os.system('cat ' + idfile + ' | pullseq -i ' + scaffolds + ' -N > ' + os.path.join(vambdir, 'fastas') + '/' +  bin_name + '.fna')

p.terminate()
sys.exit(420)
コード例 #17
0
def parmap(f,
           X,
           nprocs=multiprocessing.cpu_count(),
           force_parallel=False,
           chunk_size=1):
    from ResearchNLP import Constants as cn
    from ResearchNLP.util_files import function_cache

    if len(X) == 0:
        return []  # like map

    # nprocs = min(nprocs, cn.max_procs)
    if nprocs != multiprocessing.cpu_count() and len(X) < nprocs * chunk_size:
        chunk_size = 1  # use chunk_size = 1 if there is enough procs for a batch size of 1
    nprocs = max(1, min(nprocs, len(X) / chunk_size))  # at least 1
    if len(X) < nprocs:
        if cn.verbose and nprocs != multiprocessing.cpu_count():
            print "parmap too much procs"
        nprocs = len(X)  # too much procs

    if nprocs == 1 or (cn.serial_parmap and not force_parallel
                       ):  # we want it serial (maybe for profiling)
        return map(f, X)

    def _spawn_fun(input, func):
        import random, numpy
        from ResearchNLP import Constants as cn2
        from ResearchNLP.util_files import function_cache as function_cache2
        random.seed(1554 + i)
        numpy.random.seed(42 + i)  # set random seeds
        try:
            res = func(input)
            res_dict = dict()
            res_dict["res"] = res
            res_dict["functions_dict"] = function_cache2.caches_dicts
            res_dict["experiment_purpose"] = cn2.experiment_purpose
            res_dict["curr_params_list"] = cn2.curr_experiment_params_list
            return res_dict
        except:
            import traceback
            traceback.print_exc()
            raise  # re-raise exception

    # if chunk_size == 1:
    #     chunk_size = math.ceil(float(len(X)) / nprocs)  # all procs work on an equal chunk

    try:  # try-catch hides bugs
        global proc_count
        old_proc_count = proc_count
        proc_count = nprocs
        p = Pool(nprocs)
        p.restart(force=True)
        retval_par = p.map(
            _spawn_fun, X, [f] * len(X),
            chunk_size=chunk_size)  # can throw if current proc is daemon
        p.terminate()
        for res_dict in retval_par:  # add all experiments params we missed
            curr_params_list = res_dict["curr_params_list"]
            for param in curr_params_list:
                cn.add_experiment_param(param)
        cn.experiment_purpose = retval_par[0][
            "experiment_purpose"]  # use the "experiment_purpose" from the fork
        function_cache.merge_cache_dicts_from_parallel_runs(
            map(lambda a: a["functions_dict"], retval_par))  # merge all
        retval = map(lambda res_dict: res_dict["res"],
                     retval_par)  # make it like the original map
        proc_count = old_proc_count
        global i
        i += 1
    except AssertionError as e:
        if e.message == "daemonic processes are not allowed to have children":
            retval = map(f, X)  # can't have pool inside pool
        else:
            print "error message is: " + str(e.message)
            raise  # re-raise orig exception
    return retval
コード例 #18
0
def create_sigmats_3_scales(dataset,
                            no_sensors_cols,
                            win_size_ls,
                            normalize_each_seq=False,
                            warm_up_time_points=''):
    """recives df of the data,
     no_sensors_cols (ls): the columns that doesnt represent sensors
     win_size_ls (ls): win sizes to produce (each one will be a channel in reverse order)
     warm_up_time_points

    returns list of  representations (sigmat) with n  dim (number of channels) for each scale, for each iter
    - X(PADED TO THE MAX LENGTH) shape = (num of seqs, length of seq, num of sensors/features)
    - y and
    - keys ('drone', 'update_step', 'iter') for later identification """

    # compute y - if one of the recorsed is anomaly, all the sequnce classified as anomaly
    iter_ls = dataset.iter.unique()

    def create_sigmats_of_one_iter(dataset, iteri):
        # get current iter
        dataset_iteri = dataset.loc[dataset['iter'] == iteri, :]
        # get list of update steps
        update_step_ls = dataset_iteri.update_step.to_list()
        step_sig_mat_ls = []

        for update_step in update_step_ls:
            # print('iter: ',iteri,'step: ', update_step)

            win_sig_mat_ls = []
            for win_size in win_size_ls:
                # cut the df by current update step-win size
                current_seq = dataset_iteri.loc[
                    (dataset_iteri['update_step'] <= update_step) &
                    (dataset_iteri['update_step'] > (update_step - win_size))]
                # drop irrelevant cols and convert to numpy
                current_seq = current_seq.drop(no_sensors_cols + ['label'],
                                               1).to_numpy()
                if normalize_each_seq:
                    current_seq = StandardScaler().fit_transform(current_seq)
                # convert to sig mat
                current_seq_sig_mat = seq_to_sig_matrix(current_seq)
                # add to thr ls -each elemnt with different win size
                win_sig_mat_ls.append(current_seq_sig_mat)

            # stack the 3 win size (scale) togather as channels
            # stacked_mats_different_scale = np.stack(win_sig_mat_ls)
            # add to step ls
            step_sig_mat_ls.append(win_sig_mat_ls)
        # stack all steps
        # stacked_mats_of_iter = np.stack(step_sig_mat_ls)
        # add to iter ls
        iter_sig_mat_np = np.array(step_sig_mat_ls)
        iter_sig_mat_np = np.rollaxis(np.array(iter_sig_mat_np), 1, 4)
        return {
            'sig_mat': iter_sig_mat_np,
            'keys': dataset_iteri[['drone', 'update_step', 'iter']],
            'labels': dataset_iteri.label.to_numpy()
        }

    workers = multiprocessing.cpu_count()
    print('Number of workers: ', workers)
    pool = ProcessingPool(workers)
    list_of_iters_dict = pool.map(
        lambda iter: create_sigmats_of_one_iter(dataset, iter), iter_ls)
    pool.close()
    pool.join()
    pool.terminate()
    pool.clear()

    iters_sig_mat_ls = [
        iter_dict['sig_mat'] for iter_dict in list_of_iters_dict
    ]
    iters_lables_ls = [iter_dict['labels'] for iter_dict in list_of_iters_dict]
    iters_keys_ls = [iter_dict['keys'] for iter_dict in list_of_iters_dict]

    print(
        'shape of first iter X {} shape of first iter labels {} shape keys {}'.
        format(iters_sig_mat_ls[0].shape, iters_lables_ls[0].shape,
               iters_keys_ls[0].shape))

    return iters_sig_mat_ls, iters_lables_ls, iters_keys_ls
コード例 #19
0
        def main():
            parser = argparse.ArgumentParser()
            parser.add_argument(
                'source_path',
                help="Path to the video or audio file to subtitle",
                nargs='?')
            parser.add_argument(
                '-C',
                '--concurrency',
                help="Number of concurrent API requests to make",
                type=int,
                default=10)
            parser.add_argument(
                '-o',
                '--output',
                help=
                "Output path for subtitles (by default, subtitles are saved in \ the same directory and name as the source path)"
            )
            parser.add_argument('-F',
                                '--format',
                                help="Destination subtitle format",
                                default="srt")
            parser.add_argument('-S',
                                '--src-language',
                                help="Language spoken in source file",
                                default="en")
            parser.add_argument('-D',
                                '--dst-language',
                                help="Desired language for the subtitles",
                                default="en")
            parser.add_argument(
                '-K',
                '--api-key',
                help=
                "The Google Translate API key to be used. (Required for subtitle translation)"
            )
            parser.add_argument('--list-formats',
                                help="List all available subtitle formats",
                                action='store_true')
            parser.add_argument(
                '--list-languages',
                help="List all available source/destination languages",
                action='store_true')

            if (os.name == "posix"):
                print os.system("uname -a")
            else:
                print "unknown OS"

            args = parser.parse_args()
            # print "arguments",args
            args.source_path = str(self.filename)
            print args.source_path, "SOURCE PATH"
            # print "CONCURRENCY >>>", args.concurrency
            # print args
            path = args.source_path[:-3]
            srt_path = path + "srt"
            print srt_path

            audio_filename, audio_rate = extract_audio(args.source_path)
            regions = find_speech_regions(audio_filename)
            pool = ProcessingPool(args.concurrency)
            converter = FLACConverter(source_path=audio_filename)

            transcripts = []
            if regions:
                try:
                    widgets = [
                        "Converting speech regions to FLAC files: ",
                        Percentage(), ' ',
                        Bar(), ' ',
                        ETA()
                    ]
                    pbar = ProgressBar(widgets=widgets,
                                       maxval=len(regions)).start()
                    extracted_regions = []
                    for i, extracted_region in enumerate(
                            pool.imap(converter, regions)):
                        extracted_regions.append(extracted_region)
                        pbar.update(i)
                        self.progress1.setValue(i)
                    pbar.finish()

                except KeyboardInterrupt:
                    pbar.finish()
                    pool.terminate()
                    pool.join()
                    print "Cancelling transcription"
                    return 1

            os.remove(audio_filename)

            return 0
コード例 #20
0
ファイル: rpki_file.py プロジェクト: jfuruness/lib_bgp_data
class RPKI_File:
    """This class gets validity data from ripe"""

    __slots__ = ["path", "total_lines", "_process"]

    _dir = "/tmp/"
    hosted_name = "upo_csv_path.csv.gz"
    port = 8000

    def __init__(self, table_input):
        """Downloads and stores roas from a json"""

        self.path = self._dir + self.hosted_name.replace(".gz", "")
        with Unique_Prefix_Origins_Table(clear=True) as _db:
            _db.fill_table(table_input)
            _db.copy_table(self.path)
            self.total_lines = utils.get_lines_in_file(self.path)
            self._gzip_file()

#################################
### Context Manager Functions ###
#################################

    def __enter__(self):
        """What to do when the context manager is called on this class

        Starts the process for serving the file"""

        self.spawn_process()
        return self

    def __exit__(self, type, value, traceback):
        """Closes the file process"""

        self.close()

############################
### Serve File Functions ###
############################

    def spawn_process(self):
        """Spawns file serving process"""

        utils.kill_port(self.port)
        self._process = ProcessingPool()
        self._process.apipe(self._serve_file)
        logging.debug("Served RPKI File")

    def close(self):
        """Closes file process"""

        utils.kill_port(self.port, wait=False)
        self._process.close()
        self._process.terminate()
        self._process.join()
        self._process.clear()
        # changed to absolute path
        utils.delete_paths(os.path.join(self._dir, self.hosted_name))
        logging.debug("Closed RPKI File")

########################
### Helper Functions ###
########################

    def _gzip_file(self):
        """gzips the file for proper formatting in rpki validator"""

        with open(self.path, 'rb') as f_in, gzip.open(
                os.path.join(self._dir, self.hosted_name), 'wb') as f_out:

            f_out.writelines(f_in)

        utils.delete_paths(self.path)

    def _serve_file(self):
        """Makes a simple http server and serves a file in /tmp"""
        class Handler(http.server.SimpleHTTPRequestHandler):
            def __init__(self, *args, **kwargs):
                super().__init__(*args, **kwargs)

        # Changes directory to be in /tmp
        os.chdir(self._dir)
        # Serve the file on port 8000
        socketserver.TCPServer(("", RPKI_File.port), Handler).serve_forever()
def compute_DTW_to_each_drone(drones_df_ls,
                              win_size,
                              no_sensors_cols,
                              per_series=False,
                              process_gps=True,
                              use_scaler=True):
    print('Start compute DTW')

    dataset = pd.concat(drones_df_ls)
    dataset = dataset.sort_values(['iter', 'update_step',
                                   'drone']).reset_index(drop=True)
    drones = dataset.drone.unique()
    numOfDrones = len(drones)

    start = time.time()
    # iter = '0simple'
    # dataset_iteri = dataset.loc[dataset['iter'] == iter, :]
    iters = dataset.iter.unique()

    # create empty df for results

    # itearte over iterartions
    def compute_DTW_on_iter(dataset,
                            iter,
                            numOfDrones,
                            drones,
                            per_series=True):
        print('iter: ', iter)
        dtw_results_dict = {
            'iter': [],
            'update_step': [],
            'drone': [],
            'comparison_drone': [],
            'DTW_dist': []
        }
        # print('iter: ',iter )
        dataset_iter = dataset.loc[dataset['iter'] == iter, :]
        # cut the df by current update step-win size
        update_step_ls = dataset_iter.update_step.unique()
        # num of features (all columns - no sensor columns and label
        num_of_features = dataset_iter.shape[1] - len(no_sensors_cols +
                                                      ['label'])
        # iterate over time steps
        for update_step in update_step_ls:
            current_seq = dataset_iter.loc[
                (dataset_iter['update_step'] <= update_step)
                & (dataset_iter['update_step'] > (update_step - win_size))]
            # iterte over drones
            for droneIidx in range(numOfDrones):
                currentDrone = drones[droneIidx]
                currentDroneDf = current_seq.loc[current_seq.drone ==
                                                 currentDrone, :]
                # drop irrelevant cols and convert to numpy
                currentDroneNp = currentDroneDf.drop(
                    no_sensors_cols + ['label'], 1).to_numpy()
                if use_scaler:
                    scaled_currentDroneNp = StandardScaler().fit_transform(
                        currentDroneNp)
                else:
                    scaled_currentDroneNp = currentDroneNp
                for droneJidx in range(numOfDrones):
                    # dont compare drone to itself
                    if (droneIidx >= droneJidx): continue
                    # print(droneIidx, droneJidx)
                    otherDrone = drones[droneJidx]
                    otherDroneDf = current_seq.loc[current_seq.drone ==
                                                   otherDrone, :]
                    otherDroneNp = otherDroneDf.drop(
                        no_sensors_cols + ['label'], 1).to_numpy()
                    if use_scaler:
                        scaled_otherDroneNp = StandardScaler().fit_transform(
                            otherDroneNp)
                    else:
                        scaled_otherDroneNp = otherDroneNp
                    """compute DTW"""

                    if per_series:  # compute between each pair of series, return list

                        dist = [
                            dtw_path(scaled_currentDroneNp[:, i],
                                     scaled_otherDroneNp[:, i])[1]
                            for i in range(num_of_features)
                        ]
                        dist = np.array(dist)
                    else:
                        # path, dist = dtw_path(scaled_currentDroneNp, scaled_otherDroneNp)
                        path = ''
                        dist = dtw(scaled_currentDroneNp,
                                   scaled_otherDroneNp,
                                   window_type="sakoechiba",
                                   window_args={
                                       'window_size': 60
                                   }).distance
                    # print('Iter {} updatestep {} DroneI {} DroneJ {} DTW {}'.format(iter,update_step,currentDrone, otherDrone, dist))
                    # save results of current drone
                    dtw_results_dict['iter'].append(iter)
                    dtw_results_dict['update_step'].append(update_step)
                    dtw_results_dict['drone'].append(currentDrone)
                    dtw_results_dict['comparison_drone'].append(otherDrone)
                    dtw_results_dict['DTW_dist'].append(
                        dist)  # ; dtw_results_dict['DTW_path'].append(path)
                    # save results of other drone
                    dtw_results_dict['iter'].append(iter)
                    dtw_results_dict['update_step'].append(update_step)
                    dtw_results_dict['drone'].append(otherDrone)
                    dtw_results_dict['comparison_drone'].append(currentDrone)
                    dtw_results_dict['DTW_dist'].append(
                        dist)  # ; dtw_results_dict['DTW_path'].append(path)

        print('iter done: ', iter)
        return dtw_results_dict

    workers = multiprocessing.cpu_count()
    print('Number of workers: ', workers)
    workers = np.min([workers, len(iters)])
    pool = ProcessingPool(workers)
    list_of_iters_dict = list(
        pool.map(
            lambda iter: compute_DTW_on_iter(dataset, iter, numOfDrones,
                                             drones, per_series), iters))
    pool.close()
    pool.join()
    pool.terminate()
    pool.clear()
    # from list of dicts to one dict
    dtw_results_dict = {
        'iter': [],
        'update_step': [],
        'drone': [],
        'comparison_drone': [],
        'DTW_dist': []
    }
    [
        dtw_results_dict[result_key].append(value)
        for dict in list_of_iters_dict for result_key, list in dict.items()
        for value in list
    ]

    print('time took: ', time.time() - start)

    dtw_results_df = pd.DataFrame.from_dict(dtw_results_dict)
    dtw_results_df = dtw_results_df.sort_values(
        ['iter', 'update_step', 'drone']).reset_index(drop=True)

    dtw_results_df_after_removal_ls = []

    return dtw_results_df
コード例 #22
0
"""
Example of a script for converting two video files in parallel

Additional dependencies:
    - pathos

Author(s) : Fabrice Zaoui (EDF R&D LNHE)

Copyright EDF 2018
"""
from sonaris import Sonaris
from pathos.multiprocessing import ProcessingPool as Pool


def run(video_list):
    video_list.convert()


# ARIS files to convert and associated AVI file
conversion_1 = Sonaris('video_test.aris', '2014_1.avi')
conversion_2 = Sonaris('video_test.aris', '2014_2.avi')
# list Sonaris jobs
tab = [conversion_1, conversion_2]
# use a number of processors (ideally one proc. per ARIS file)
pool = Pool(nodes=2)
# launch conversion
pool.map(run, tab)
# close pool
pool.terminate()
pool.join()
コード例 #23
0
        def main():
            parser = argparse.ArgumentParser()
            parser.add_argument(
                'source_path',
                help="Path to the video or audio file to subtitle",
                nargs='?')
            parser.add_argument(
                '-C',
                '--concurrency',
                help="Number of concurrent API requests to make",
                type=int,
                default=10)
            parser.add_argument(
                '-o',
                '--output',
                help=
                "Output path for subtitles (by default, subtitles are saved in \ the same directory and name as the source path)"
            )
            parser.add_argument('-F',
                                '--format',
                                help="Destination subtitle format",
                                default="srt")
            parser.add_argument('-S',
                                '--src-language',
                                help="Language spoken in source file",
                                default="en")
            parser.add_argument('-D',
                                '--dst-language',
                                help="Desired language for the subtitles",
                                default="en")
            parser.add_argument(
                '-K',
                '--api-key',
                help=
                "The Google Translate API key to be used. (Required for subtitle translation)"
            )
            parser.add_argument('--list-formats',
                                help="List all available subtitle formats",
                                action='store_true')
            parser.add_argument(
                '--list-languages',
                help="List all available source/destination languages",
                action='store_true')

            args = parser.parse_args()
            print args

            if (os.name == "posix"):
                args.source_path = str(self.filename)
            else:
                args.source_path = (str(self.filename)).replace("/", "\\")
                pas = (args.source_path).replace("/", "\\")
                args.source_path = pas
                print " Printing pas >>>", pas
            print args

            path = args.source_path[:-3]
            srt_path = path + "srt"

            if args.list_formats:
                print("List of formats:")
                for subtitle_format in FORMATTERS.keys():
                    print("{format}".format(format=subtitle_format))
                return 0

            if args.list_languages:
                print("List of all languages:")
                for code, language in sorted(LANGUAGE_CODES.items()):
                    print("{code}\t{language}".format(code=code,
                                                      language=languages))
                return 0

            if args.format not in FORMATTERS.keys():
                print(
                    "Subtitle format not supported. Run with --list-formats to see all supported formats."
                )
                return 1

            if args.src_language not in LANGUAGE_CODES.keys():
                print(
                    "Source language not supported. Run with --list-languages to see all supported languages."
                )
                return 1

            if args.dst_language not in LANGUAGE_CODES.keys():
                print(
                    "Destination language not supported. Run with --list-languages to see all supported languages."
                )
                return 1

            if not args.source_path:
                print("Error: You need to specify a source path.")
                return 1

            audio_filename, audio_rate = extract_audio(args.source_path)

            regions = find_speech_regions(audio_filename)
            pool = ProcessingPool(args.concurrency)
            converter = FLACConverter(source_path=audio_filename)
            recognizer = SpeechRecognizer(language=args.src_language,
                                          rate=audio_rate,
                                          api_key=GOOGLE_SPEECH_API_KEY)

            transcripts = []
            if regions:
                try:
                    widgets = [
                        "Converting speech regions to FLAC files: ",
                        Percentage(), ' ',
                        Bar(), ' ',
                        ETA()
                    ]
                    pbar = ProgressBar(widgets=widgets,
                                       maxval=len(regions)).start()
                    extracted_regions = []
                    for i, extracted_region in enumerate(
                            pool.imap(converter, regions)):
                        extracted_regions.append(extracted_region)
                        pbar.update(i)
                        self.progress1.setValue(i)
                    pbar.finish()

                    widgets = [
                        "Performing speech recognition: ",
                        Percentage(), ' ',
                        Bar(), ' ',
                        ETA()
                    ]
                    pbar = ProgressBar(widgets=widgets,
                                       maxval=len(regions)).start()

                    for i, transcript in enumerate(
                            pool.imap(recognizer, extracted_regions)):
                        transcripts.append(transcript)
                        pbar.update(i)
                        self.progress2.setValue(i)
                    pbar.finish()
                    QMessageBox.about(self, "Subtitles created",
                                      "Created at " + srt_path)
                    if not is_same_language(args.src_language,
                                            args.dst_language):
                        if args.api_key:
                            google_translate_api_key = args.api_key
                            translator = Translator(args.dst_language,
                                                    google_translate_api_key,
                                                    dst=args.dst_language,
                                                    src=args.src_language)
                            prompt = "Translating from {0} to {1}: ".format(
                                args.src_language, args.dst_language)
                            widgets = [
                                prompt,
                                Percentage(), ' ',
                                Bar(), ' ',
                                ETA()
                            ]
                            pbar = ProgressBar(widgets=widgets,
                                               maxval=len(regions)).start()
                            translated_transcripts = []
                            for i, transcript in enumerate(
                                    pool.imap(translator, transcripts)):
                                translated_transcripts.append(transcript)
                                pbar.update(i)
                                self.progress2.setValue(i)
                            pbar.finish()
                            transcripts = translated_transcripts
                        else:
                            print "Error: Subtitle translation requires specified Google Translate API key. \See --help for further information."
                            return 1

                except KeyboardInterrupt:
                    pbar.finish()
                    pool.terminate()
                    pool.join()
                    print "Cancelling transcription"
                    return 1

            timed_subtitles = [(r, t) for r, t in zip(regions, transcripts)
                               if t]
            formatter = FORMATTERS.get(args.format)
            formatted_subtitles = formatter(timed_subtitles)

            dest = args.output

            if not dest:
                base, ext = os.path.splitext(args.source_path)
                dest = "{base}.{format}".format(base=base, format=args.format)

            with open(dest, 'wb') as f:
                f.write(formatted_subtitles.encode("utf-8"))

            print "Subtitles file created at {}".format(dest)

            os.remove(audio_filename)

            return 0
コード例 #24
0
class RPKI_Validator_Wrapper:
    """This class gets validity data from ripe"""

    __slots__ = ['total_prefix_origin_pairs', "_process", "_table_input",
                 "_rpki_file"]

    # Sorry for the crazy naming scheme, must be done to avoid
    # having install file names in multiple locations
    temp_install_path = "/tmp/temp_rpki_validator_install"
    rpki_package_path = RPKI_PACKAGE_PATH
    rpki_run_name = RPKI_RUN_NAME
    rpki_run_path = RPKI_PACKAGE_PATH + RPKI_RUN_NAME
    rpki_db_paths = [RPKI_PACKAGE_PATH + x for x in ["db/", "rsync/"]]
    port = 8080
    api_url = "http://[::1]:8080/api/"

    def __init__(self, **kwargs):
        config_logging(kwargs.get("stream_level", logging.INFO),
                       kwargs.get("section"))
        self._table_input = kwargs.get("table_input", "mrt_rpki")
        if not os.path.exists(self.rpki_package_path):
            logging.warning("Looks like validator is not installed")
            logging.warning("Installing validator now")
            RPKI_Validator_Wrapper.install(**kwargs)

#################################
### Context Manager Functions ###
#################################

    def __enter__(self):
        """Runs the RPKI Validator"""

        utils.kill_port(self.port)
        # Must remove these to ensure a clean run
        utils.clean_paths(self.rpki_db_paths)
        cmds = [f"cd {self.rpki_package_path}",
                f"chown -R root:root {self.rpki_package_path}"]
        utils.run_cmds(cmds)
        # Writes validator file and serves it
        # Can't use cntext manager here since it returns it
        self._rpki_file = RPKI_File(self._table_input)
        self._rpki_file.spawn_process()
        self._process = ProcessingPool()
        self._process.apipe(self._start_validator)
        self.total_prefix_origin_pairs = self._rpki_file.total_lines
        return self

    def __exit__(self, type, value, traceback):
        """Closes RPKI Validator"""

        self._process.close()
        self._process.terminate()
        self._process.join()
        self._process.clear()
        utils.kill_port(self.port, wait=False)
        logging.debug("Closed rpki validator")
        self._rpki_file.close()

    def _start_validator(self):
        """Sends start cmd to RPKI Validator"""

        logging.info("Starting RPKI Validator")
        utils.run_cmds((f"cd {self.rpki_package_path} && "
                        f"./{self.rpki_run_name}"))

#########################
### Wrapper Functions ###
#########################

    def load_trust_anchors(self):
        """Loads all trust anchors"""

        utils.write_to_stdout(f"{datetime.now()}: Loading RPKI Validator\n",
                              logging.root.level)
        time.sleep(60)
        while self._get_validation_status() is False:
            time.sleep(10)
            utils.write_to_stdout(".", logging.root.level)
        utils.write_to_stdout("\n", logging.root.level)
        self._wait(30, "Waiting for upload to bgp preview")

    def make_query(self, api_endpoint: str, data=True) -> dict:
        """Makes query to api of rpki validator"""

        result = utils.get_json(os.path.join(self.api_url, api_endpoint),
                                RPKI_Validator_Wrapper.get_headers())
        return result["data"] if data else result

    def get_validity_data(self) -> dict:
        """Gets the data from ripe and formats it for csv insertions"""

        logging.info("Getting data from ripe")
        assert self.total_prefix_origin_pairs < 10000000, "page size too small"
        # Then we get the data from the ripe RPKI validator
        # Todo for later, change 10mil to be total count
        return self.make_query("bgp/?pageSize=10000000")

########################
### Helper Functions ###
########################

    def _wait(self, time_to_sleep: int, msg: str):
        """logs a message and waits"""

        logging.debug(msg)
        if logging.root.level == logging.INFO:
            # Number of times per second to update tqdm
            divisor = 100
            for _ in trange(time_to_sleep * divisor,
                            desc=msg):
                time.sleep(1 / divisor)

    def _get_validation_status(self) -> bool:
        """Returns row count of json object for waiting"""

        try:
            for x in self.make_query("trust-anchors/statuses"):
                if x["completedValidation"] is False:
                    # If anything has not been validated return false
                    return False
            # All are validated. Return true
            return True
        except urllib.error.URLError as e:
            self._wait(60, "Connection was refused")
            return False

######################
### Static methods ###
######################

    @staticmethod
    def get_validity_dict() -> dict:
        """Returns the validity dict for the RPKI Validator to decode results

        I could have this as a class attribute but too messy I think.
        """

        return {"VALID": ROA_Validity.VALID.value,
                "UNKNOWN": ROA_Validity.UNKNOWN.value,
                "INVALID_LENGTH": ROA_Validity.INVALID_BY_LENGTH.value,
                "INVALID_ASN": ROA_Validity.INVALID_BY_ORIGIN.value}

    @staticmethod
    def get_headers() -> dict:
        """Gets the headers for all url queries to the validator"""

        return {"Connection": "keep-alive",
                "Cache-Control": "max-age=0",
                "Upgrade-Insecure-Requests": 1,
                "User-Agent": ("Mozilla/5.0 (X11; Linux x86_64)"
                               " AppleWebKit/537.36 (KHTML, like Gecko) "
                               "Chrome/73.0.3683.86 Safari/537.36"),
                "Accept": ("text/html,application/xhtml+xml,"
                           "application/xml;q=0.9,image/webp,"
                           "image/apng,*/*;q=0.8,"
                           "application/signed-exchange;v=b3"),
                "Accept-Encoding": "gzip, deflate, br",
                "Accept-Language": "en-US,en;q=0.9"}

#########################
### Install Functions ###
#########################

    @staticmethod
    def install(**kwargs):
        """Installs RPKI validator with our configs.

        This might break in the future, but we need to do it this way
        for now to be able to do what we want with our own prefix origin
        table.
        """

        config_logging(kwargs.get("stream_level", logging.DEBUG),
                       kwargs.get("section"))
        utils.delete_paths([RPKI_Validator_Wrapper.rpki_package_path,
                            RPKI_Validator_Wrapper.temp_install_path])

        RPKI_Validator_Wrapper._download_validator()
        RPKI_Validator_Wrapper._change_file_hosted_location()
        path = RPKI_Validator_Wrapper._change_server_address()
        RPKI_Validator_Wrapper._config_absolute_paths(path)

    @staticmethod
    def _download_validator():
        """Downloads validator into proper location"""

        rpki_url = ("https://ftp.ripe.net/tools/rpki/validator3/beta/generic/"
                    "rpki-validator-3-latest-dist.tar.gz")
        arin_tal = ("https://www.arin.net/resources/manage/rpki/"
                    "arin-ripevalidator.tal")
        # This is the java version they use so we will use it
        cmds = [f"mkdir {RPKI_Validator_Wrapper.temp_install_path}",
                f"cd {RPKI_Validator_Wrapper.temp_install_path}",
                "sudo apt-get -y install openjdk-8-jre",
                f"wget {rpki_url}",
                "tar -xvf rpki-validator-3-latest-dist.tar.gz",
                "rm -rf rpki-validator-3-latest-dist.tar.gz",
                f"mv rpki-validator* {RPKI_Validator_Wrapper.rpki_package_path}",
                f"cd {RPKI_Validator_Wrapper.rpki_package_path}",
                "cd preconfigured-tals",
                f"wget {arin_tal}"]
        utils.run_cmds(cmds)

    @staticmethod
    def _change_file_hosted_location():
        """Changes location of input ann for bgp preview file"""

        # Changes where the file is hosted
        path = (f"{RPKI_Validator_Wrapper.rpki_package_path}conf"
                "/application-defaults.properties")
        prepend = "rpki.validator.bgp.ris.dump.urls="
        replace = ("https://www.ris.ripe.net/dumps/riswhoisdump.IPv4.gz,"
                   "https://www.ris.ripe.net/dumps/riswhoisdump.IPv6.gz")
        replace_with = (f"http://localhost:{RPKI_File.port}"
                        f"/{RPKI_File.hosted_name}")
        utils.replace_line(path, prepend, replace, replace_with)

    @staticmethod
    def _change_server_address():
        """Prob because of a proxy, but on our server this is necessary"""

        # Changes the server address
        path = (f"{RPKI_Validator_Wrapper.rpki_package_path}conf"
                "/application.properties")
        prepend = "server.address="
        replace = "localhost"
        replace_with = "0.0.0.0"
        utils.replace_line(path, prepend, replace, replace_with)
        return path

    @staticmethod
    def _config_absolute_paths(path):
        """Configure rpki validator to run off absolute paths

        This is necessary due to script being called from elsewhere
        In other words not from inside the RPKI dir.
        """

        # Since I am calling the script from elsewhere these must be
        # absolute paths
        prepend = "rpki.validator.data.path="
        replace = "."
        # Must remove trailing backslash at the end
        replace_with = RPKI_Validator_Wrapper.rpki_package_path[:-1]
        utils.replace_line(path, prepend, replace, replace_with)

        prepend = "rpki.validator.preconfigured.trust.anchors.directory="
        replace = "./preconfigured-tals"
        replace_with = (f"{RPKI_Validator_Wrapper.rpki_package_path}"
                        "preconfigured-tals")
        utils.replace_line(path, prepend, replace, replace_with)

        prepend = "rpki.validator.rsync.local.storage.directory="
        replace = "./rsync"
        replace_with = f"{RPKI_Validator_Wrapper.rpki_package_path}rsync"
        utils.replace_line(path, prepend, replace, replace_with)