Example #1
0
    def _yield_koji_relationships_from_build(self, koji_url, build_id, rpms=None):

        # Get all RPMs for a build... only if they're not supplied.
        if not rpms:
            build, rpms = pdcupdater.services.koji_rpms_from_build(
                koji_url, build_id)

        results = collections.defaultdict(set)

        def _get_requirements(filename):
            log.debug("Looking up installtime deps in koji for %r" % filename)
            return filename, pdcupdater.services.koji_yield_rpm_requires(
                self.koji_url, filename)

        # Look up the *build time* deps, in parallel.  Lots of I/O wait..
        # Look up the *install time* deps, in parallel.  Lots of I/O wait..
        pool = multiprocessing.pool.ThreadPool(self.io_threads)
        requirements = pool.map(_get_requirements, rpms)
        pool.close()

        for filename, requirements in requirements:
            parent = filename.rsplit('-', 2)[0]

            for name, qualifier, version in requirements:
                # XXX - we're dropping any >= or <= information here, which is
                # OK for now.  All we need to know is that there is a
                # dependency.
                results[parent].add(('RPMRequires', name,))

        for parent in results:
            for relationship_type, child in results[parent]:
                yield parent, relationship_type, child
Example #2
0
def pass_data_to_search(symbol,path,start_time_seconds,end_time_seconds,date,time_interval,tt,code_path):

    jobs=[]
    dic_files={}
    lis=[]
    slot_results=[]
    
    file_name = path+'b'+date+'.l.bz2'
    # file_name = path + date+'/'+dic_files[lis[index]]+'.bz2'
        
    size=os.path.getsize(file_name)
    total_rows=size/69
    total_processes1=40
    slots=total_rows/total_processes1

    #Multiprocessing each file as chunk
    # mapper(0,slots,total_processes1,symbol,start_time_seconds,end_time_seconds,date,time_interval,file_name,tt,code_path)
    # mapper(1,slots,total_processes1,symbol,start_time_seconds,end_time_seconds,date,time_interval,file_name,tt,code_path)
    
    pool = multiprocessing.Pool(total_processes1)
    

    for i in range(total_processes1):

        pool.apply_async(mapper, args = (i,slots,total_processes1,symbol,start_time_seconds,end_time_seconds,date,time_interval,file_name,tt,code_path))
        
    pool.close()
    pool.join()    
Example #3
0
def _map_parallel(function, args, n_jobs):
    """multiprocessing.Pool(processors=n_jobs).map with some error checking"""
    # Following the error checking found in joblib
    multiprocessing = int(os.environ.get('JOBLIB_MULTIPROCESSING', 1)) or None
    if multiprocessing:
        try:
            import multiprocessing
            import multiprocessing.pool
        except ImportError:
            multiprocessing = None
    # 2nd stage: validate that locking is available on the system and
    #            issue a warning if not
    if multiprocessing:
        try:
            _sem = multiprocessing.Semaphore()
            del _sem  # cleanup
        except (ImportError, OSError) as e:
            multiprocessing = None
            warnings.warn('%s. _map_parallel will operate in serial mode' % (e,))
    if multiprocessing and int(n_jobs) not in (0, 1):
        if n_jobs == -1:
            n_jobs = None
        pool = multiprocessing.Pool(processes=n_jobs)
        map_result = pool.map(function, args)
        pool.close()
        pool.join()
    else:
        map_result = list(map(function, args))
    return map_result
    def _listArtifacts(self, urls, gavs):
        """
        Loads maven artifacts from list of GAVs and tries to locate the artifacts in one of the
        specified repositories.

        :param urls: repository URLs where the given GAVs can be located
        :param gavs: List of GAVs
        :returns: Dictionary where index is MavenArtifact object and value is it's repo root URL.
        """
        def findArtifact(gav, urls, artifacts):
            artifact = MavenArtifact.createFromGAV(gav)
            for url in urls:
                if maven_repo_util.gavExists(url, artifact):
                    #Critical section?
                    artifacts[artifact] = ArtifactSpec(url, [ArtifactType(artifact.artifactType, True, set(['']))])
                    return

            logging.warning('Artifact %s not found in any url!', artifact)

        artifacts = {}
        pool = ThreadPool(maven_repo_util.MAX_THREADS)
        for gav in gavs:
            pool.apply_async(findArtifact, [gav, urls, artifacts])

        # Close the pool and wait for the workers to finnish
        pool.close()
        pool.join()

        return artifacts
Example #5
0
def superheadhunter(filelist, keys, defaults=[], **kw):
    #TODO: BENCHMARK! Nchunks, Nfiles
    #TODO: OPTIMIZE?
    '''Headhunter looped over a list of files.'''
    
    Nchunks     = kw.get( 'Nchunks', 25 )
    with_parent = kw.get( 'with_parent', False )
    return_type = kw.get( 'return_type', 'list' )
    
    hunt = functools.partial(headhunter, 
                             keys=keys, 
                             Nchunks=Nchunks, 
                             return_type='raw', 
                             with_parent=False )
    
    pool = Pool()
    raw = pool.map( hunt, filelist )
    pool.close()
    #pool.join()
    
    #Flatten the twice nested list of string matches (this is the fastest way of doing this!!)
    results = []
    for r in raw:   
        results.extend(r)
            
    return merger(results, keys, defaults, return_type)
Example #6
0
def _map_parallel(function, args, n_jobs):
    """multiprocessing.Pool(processors=n_jobs).map with some error checking"""
    # Following the error checking found in joblib
    multiprocessing = int(os.environ.get('JOBLIB_MULTIPROCESSING', 1)) or None
    if multiprocessing:
        try:
            import multiprocessing
            import multiprocessing.pool
        except ImportError:
            multiprocessing = None
        if sys.platform.startswith("win") and PY2:
            msg = "Multiprocessing is not supported on Windows with Python 2.X. Setting n_jobs=1"
            logger.warning(msg)
            n_jobs = 1
    # 2nd stage: validate that locking is available on the system and
    #            issue a warning if not
    if multiprocessing:
        try:
            _sem = multiprocessing.Semaphore()
            del _sem  # cleanup
        except (ImportError, OSError) as e:
            multiprocessing = None
            logger.warning('{}. _map_parallel will operate in serial mode'.format(e))
    if multiprocessing and int(n_jobs) not in (0, 1):
        if n_jobs == -1:
            n_jobs = None
        try:
            pool = multiprocessing.Pool(processes=n_jobs)
            map_result = pool.map(function, args)
        finally:
            pool.close()
            pool.join()
    else:
        map_result = list(map(function, args))
    return map_result
Example #7
0
def ScopedPool(*args, **kwargs):
  """Context Manager which returns a multiprocessing.pool instance which
  correctly deals with thrown exceptions.

  *args - Arguments to multiprocessing.pool

  Kwargs:
    kind ('threads', 'procs') - The type of underlying coprocess to use.
    **etc - Arguments to multiprocessing.pool
  """
  if kwargs.pop('kind', None) == 'threads':
    pool = multiprocessing.pool.ThreadPool(*args, **kwargs)
  else:
    orig, orig_args = kwargs.get('initializer'), kwargs.get('initargs', ())
    kwargs['initializer'] = _ScopedPool_initer
    kwargs['initargs'] = orig, orig_args
    pool = multiprocessing.pool.Pool(*args, **kwargs)

  try:
    yield pool
    pool.close()
  except:
    pool.terminate()
    raise
  finally:
    pool.join()
Example #8
0
def slippy_test(test_options, width=TILE_WIDTH, height=TILE_HEIGHT, tile_factor=TILE_FACTOR):
	#assume each screen is a 10x5 grid of tiles
	#this approximately the OTM map size at full screen
	#at my desk
	z = test_options['z']
	x = test_options['x']
	y = test_options['y']
	url_prefix = test_options['url_prefix']


	tiles_to_request = []
	for x_iter in range(x - width/2, x + width/2 - 1):
		for y_iter in range(y - height/2, y + height/2 - 1):
			tiles_to_request.append(url_prefix + '%d/%d/%d.png' % (z, x_iter, y_iter))

	pool = multiprocessing.Pool(processes=tile_factor)
	start_time = time.time()
	results = pool.map(slippy_test_helper, tiles_to_request)
	end_time = time.time()
	pool.close()
	pool.join()
	sys.stderr.write('.')

	if(False in results):
		return '%d,ERROR,%f' % (-1, float('nan'))
	return '%d,OK,' % z + str(end_time - start_time)
Example #9
0
def parallel_compile(self, sources, output_dir=None, macros=None,
                     include_dirs=None, debug=0, extra_preargs=None,
                     extra_postargs=None, depends=None):
    """New compile function that we monkey patch into the existing compiler instance.
    """
    import multiprocessing.pool

    # Copied from the regular compile function
    macros, objects, extra_postargs, pp_opts, build = \
            self._setup_compile(output_dir, macros, include_dirs, sources,
                                depends, extra_postargs)
    cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)

    def _single_compile(obj):
        try:
            src, ext = build[obj]
        except KeyError:
            return
        self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)

    # Set by fix_compiler
    global glob_use_njobs
    if glob_use_njobs == 1:
        # This is equivalent to regular compile function
        for obj in objects:
            _single_compile(obj)
    else:
        # Use ThreadPool, rather than Pool, since the objects are picklable.
        pool = multiprocessing.pool.ThreadPool(glob_use_njobs)
        pool.map(_single_compile, objects)
        pool.close()
        pool.join()

    # Return *all* object filenames, not just the ones we just built.
    return objects
Example #10
0
def threshold(X, e, a, b, k, num_workers, metric):
    """ Get all threshold clusters (algorithm 7, lines 1-6)
    :param X: Data matrix
    :param e: lower bound on fractional size of each cluster
    :param a: lower bound on fractional size of a set inside own cluster for which stability holds
    :param b: lower bound on fractional size of a set outside own cluster for which stability holds
    :param k: Number of clusters
    :param num_workers: Number of workers
    :param metric: metric is in the set {avg, min, max}
    :return: Threshold clusters
    """
    print("Populating list with all threshold clusters with metric:", metric)
    start = time.time()
    n = len(X)
    minsize = int(e * n)
    with Pool(num_workers) as pool:
        func = partial(get_thresholds, X, minsize, num_workers, metric)
        items = pool.map(func, range(n))
        pool.close()
        pool.join()
    threshold_lists = [item[0] for item in items]
    L = [item for sublist in threshold_lists for item in sublist]
    D = dict([(item[1], item[2]) for item in items])
    end = time.time()
    print("Length of L = ", len(L))
    print("time = {0:.2f}s".format(end - start))
    return refine(L, X, D, e, a, b, k, num_workers, metric)
Example #11
0
def download_junit(db, threads, client_class):
    """Download junit results for builds without them."""
    builds_to_grab = db.get_builds_missing_junit()
    pool = None
    if threads > 1:
        pool = multiprocessing.pool.ThreadPool(
            threads, mp_init_worker, ('', {}, client_class, False))
        test_iterator = pool.imap_unordered(
            get_junits, builds_to_grab)
    else:
        global WORKER_CLIENT  # pylint: disable=global-statement
        WORKER_CLIENT = client_class('', {})
        test_iterator = (
            get_junits(build_path) for build_path in builds_to_grab)
    for n, (build_id, build_path, junits) in enumerate(test_iterator, 1):
        print('%d/%d' % (n, len(builds_to_grab)),
              build_path, len(junits), len(''.join(junits.values())))
        junits = {k: remove_system_out(v) for k, v in junits.iteritems()}

        db.insert_build_junits(build_id, junits)
        if n % 100 == 0:
            db.commit()
    db.commit()
    if pool:
        pool.close()
        pool.join()
Example #12
0
    def count_intersect(self, threshold, frequency=True):

        self.counts = OrderedDict()
        self.rlen, self.qlen = {}, {}
        self.nalist = []

        if frequency:
            self.frequency = OrderedDict()

        # if self.mode_count == "bp":
        #    print2(self.parameter, "\n{0}\t{1}\t{2}\t{3}\t{4}".format("Reference","Length(bp)", "Query", "Length(bp)", "Length of Intersection(bp)"))
        # elif self.mode_count == "count":
        #    print2(self.parameter, "\n{0}\t{1}\t{2}\t{3}\t{4}".format("Reference","sequence_number", "Query", "sequence_number", "Number of Intersection"))

        for ty in self.groupedreference.keys():
            self.counts[ty] = OrderedDict()
            self.rlen[ty], self.qlen[ty] = OrderedDict(), OrderedDict()
            if frequency:
                self.frequency[ty] = OrderedDict()

            for r in self.groupedreference[ty]:
                if r.total_coverage() == 0 and len(r) > 0:
                    self.nalist.append(r.name)
                    continue
                else:
                    self.counts[ty][r.name] = OrderedDict()
                    if self.mode_count == "bp":
                        rlen = r.total_coverage()
                    elif self.mode_count == "count":
                        rlen = len(r)
                    self.rlen[ty][r.name] = rlen

                    mp_input = []
                    for q in self.groupedquery[ty]:
                        if r.name == q.name:
                            continue
                        else:
                            mp_input.append([q, self.nalist, self.mode_count, self.qlen, threshold,
                                             self.counts, frequency, self.frequency, ty, r])
                    # q, nalist, mode_count, qlen_dict, threshold, counts, frequency, self_frequency, ty, r
                    pool = multiprocessing.Pool(processes=multiprocessing.cpu_count() - 1)
                    mp_output = pool.map(mp_count_intersect, mp_input)
                    pool.close()
                    pool.join()

                    # qname, nalist, qlen_dict[ty][q.name], counts[ty][r.name][q.name], self_frequency[ty][q.name].append(c[2])
                    for output in mp_output:
                        if output[1]:
                            self.nalist.append(output[1])
                        else:
                            self.qlen[ty][output[0]] = output[2]
                            self.counts[ty][r.name][output[0]] = output[3]
                            # print(r.name)
                            # print(output[0])
                            # print(output[3])
                            try:
                                self.frequency[ty][output[0]][r.name] = output[3][2]
                            except:
                                self.frequency[ty][output[0]] = {}
                                self.frequency[ty][output[0]][r.name] = output[3][2]
Example #13
0
def _CompileDeps(aapt_path, dep_subdirs, temp_dir):
  partials_dir = os.path.join(temp_dir, 'partials')
  build_utils.MakeDirectory(partials_dir)
  partial_compile_command = [
      aapt_path + '2',
      'compile',
      # TODO(wnwen): Turn this on once aapt2 forces 9-patch to be crunched.
      # '--no-crunch',
  ]
  pool = multiprocessing.pool.ThreadPool(10)
  def compile_partial(directory):
    dirname = os.path.basename(directory)
    partial_path = os.path.join(partials_dir, dirname + '.zip')
    compile_command = (partial_compile_command +
                       ['--dir', directory, '-o', partial_path])
    build_utils.CheckOutput(compile_command)

    # Sorting the files in the partial ensures deterministic output from the
    # aapt2 link step which uses order of files in the partial.
    sorted_partial_path = os.path.join(partials_dir, dirname + '.sorted.zip')
    _SortZip(partial_path, sorted_partial_path)

    return sorted_partial_path

  partials = pool.map(compile_partial, dep_subdirs)
  pool.close()
  pool.join()
  return partials
Example #14
0
def process_iteration(Ns, ps, landscape, config):
	output_dir = config.output_dir + config.ext
	
	if config.background_image != None:
		background_path = config.input_dir + "/" + config.background_image
	else:
		background_path = None
	
	#Create a point to hold the iteration
	p = Point()
	p.add_iteration()
	
	#draw_population(Ns[0], landscape, ps.totalK, 0, output_dir, 2.0, background_path)
	
	if config.display:
		pool = mp.Pool(config.num_processors)

	for t in xrange(min(ps.max_time_steps, len(Ns))):
		if config.display:
			pool.apply_async(draw_population, [Ns[t], landscape, ps.totalK, t, output_dir, 2.0, background_path])
		
		p.add_time_step([t] + population_statistics(ps, landscape, Ns[t]))
	
	pool.close()

	#Write the iteration results to file as a trajectory containing a single point
	write_trajectories([Trajectory(points=[p])], None, ps.sentinels, output_dir + "/results.txt")

	if config.save_time_steps:
		np.savez(output_dir + "/populations.npz", *Ns)

	pool.join()
    def run_abstraction_parallel(self):
        # initialization
        self.__get_methods()
        self.__read_config()
        self.__get_dataset()

        # get filename and properties
        filename_properties = []
        for filename, properties in self.files.iteritems():
            filename_properties.append((filename, properties))

        # run experiment in multiprocessing mode
        total_cpu = multiprocessing.cpu_count()
        pool = NoDaemonProcessPool(processes=total_cpu)
        results = pool.map(self, filename_properties)
        pool.close()
        pool.join()

        # open evaluation file
        self.__check_path(self.files['evaluation_directory'])
        f = open(self.files['evaluation_file'], 'wt')
        writer = csv.writer(f)

        # set header for evaluation file
        header = []
        if self.configuration['main']['abstraction'] == '1':
            header = self.configuration['abstraction_evaluation']['evaluation_file_header'].split('\n')
        writer.writerow(tuple(header))

        # write experiment result
        for result in results:
            writer.writerow(result)

        # close evaluation file
        f.close()
Example #16
0
def main():
    if len(sys.argv) < 3:
        print("Syntax:")
        print(
            "  {} [min_yeses] [out_csv_file]".format(
                sys.argv[0]
            )
        )
        sys.exit(1)

    min_yeses = eval(sys.argv[1])
    out_csv_file = sys.argv[2]

    pconfig = config.PaperworkConfig()
    pconfig.read()

    src_dir = pconfig.settings['workdir'].value
    print("Source work directory : {}".format(src_dir))
    src_dsearch = docsearch.DocSearch(src_dir)
    src_dsearch.reload_index()

    nb_threads = multiprocessing.cpu_count()
    pool = multiprocessing.pool.ThreadPool(processes=nb_threads)

    with open(out_csv_file, 'a', newline='') as csvfile:
        csvwriter = csv.writer(csvfile)
        for min_yes in min_yeses:
            pool.apply_async(
                _run_simulation,
                (src_dsearch, min_yes, csvwriter,)
            )
        pool.close()
        pool.join()
    print("All done !")
Example #17
0
 def test():
     print("Creating 5 (non-daemon) workers and jobs in main process.")
     pool = MyPool(5)
     result = pool.map(work, [randint(1, 5) for x in range(5)])
     pool.close()
     pool.join()
     print(result)
Example #18
0
def run_trajectory(t, ps, landscape, ptv, num_iterations, num_processors):
    # Get the points in the trajectory
    points = t.points()

    # Determine the index of each unique point (sometimes points are equal due to rounding)
    uinds = [i for i, p in enumerate(points) if i == 0 or not p.equals(points[i - 1])]

    # Create a process pool, using as many processors as are available, or
    # are required to allow each point to run concurrently
    pool = mp.Pool(processes=min(num_processors, len(points)))

    results = []
    for i in uinds:
        # Modify the parameter set to match the current point
        psm = ps.copy()
        psm.modify_for_point(points[i], ptv)
        psm.convert_to_age_classes()

        # Launch a process to run the simulation(s) for the point. This modifies the point in place
        args = [points[i], psm, landscape, num_iterations, num_processors]
        results.append(pool.apply_async(run_iterations_for_point, args))

    pool.close()
    pool.join()

    # Merge the unique and non-unique points back together
    for i, r in zip(uinds, results):
        points[i] = r.get(None)

    # Return a new trajectory containing the results for each point
    return io.Trajectory(points=points)
Example #19
0
def from_carrays(path, format_categories='bcolz', format_codes='bcolz', format_values='bcolz', parallel=True):
    assert os.path.exists(path), 'No path {}'.format(path)
    df_columns = glob.glob(os.path.join(path, '*'))
    df = dict()
    if parallel:
        pool = multiprocessing.pool.ThreadPool()
        results = []
        for i, k in enumerate(df_columns):
            p = pool.apply_async(_from_carray, args=(k,), kwds={'format_categories': format_categories, 'format_codes': format_codes, 'format_values': format_values})
            results.append(p)
        pool.close()
        pool.join()
        for x in results:
            meta, s = x.get()
            df[meta['name']] = s
    else:
        for i, k in enumerate(df_columns):
            meta, s = _from_carray(k, format_categories=format_categories, format_codes=format_codes, format_values=format_values)
            df[meta['name']] = s

    # # # this is slow when we have non categoricals as series for some reason
    with log.timedlogger('constructing dataframe from %s column dict' % len(df)):
        df = pandas.DataFrame(df)  # TODO: fast DataFrame constructor

    return df
Example #20
0
def refine(L, X, D, e, a, b, k, num_workers, metric):
    """ Throw out bad points (algorithm 7, lines 7-17)
    :param L: List of subsets
    :param X: Data matrix
    :param D: dictionary
    :param e: lower bound on fractional size of each cluster
    :param a: lower bound on fractional size of a set inside own cluster for which stability holds
    :param b: lower bound on fractional size of a set outside own cluster for which stability holds
    :param k: Number of clusters
    :param num_workers: Number of workers
    :param metric: metric is in {avg, max, min}
    :return: Refined clusters
    """
    print("Getting rid of bad points")
    print("Length of L at start = ", len(L))
    start = time.time()
    n = len(X)
    T = int((e - 2 * a - b * k) * n)
    t = int((e - a) * n)
    with Pool() as pool:
        func = partial(refine_individual, D, T, t)
        L = pool.map(func, L)
        pool.close()
        pool.join()
    end = time.time()
    print("Length of L on end = ", len(L))
    print("time = {0:.2f}s".format(end - start))
    return grow(L, X, a, num_workers, metric)
def create_process_pool(index):
    print index
    li = range(3)
    pool = multiprocessing.Pool(processes = len(li))
    for sub_index in li:
        pool.apply_async(print_process_index, (index, sub_index))
    pool.close()
    pool.join()
Example #22
0
 def imap(self, f, s, chunksize=1):
     key = id(f)
     _FUNCTIONS[key] = f
     f = PicklableAndCallable(id(f))
     pool = multiprocessing.Pool(self.size, self._initWorkerProcess)
     for result in pool.imap(f, s, chunksize=chunksize):
         yield result
     del _FUNCTIONS[key]
     pool.close()
Example #23
0
    def work(num_procs):
        print("Creating %i (daemon) workers and jobs in child." % num_procs)
        pool = multiprocessing.Pool(num_procs)

        result = pool.map(sleepwhile,
                          [randint(1, 5) for x in range(num_procs)])
        pool.close()
        pool.join()
        return result
Example #24
0
def laminar(L, X, e, a, b, num_workers, metric):
    """ Make family laminar (Algorithm 9)
    :param L: List of subsets
    :param X: The data set
    :param e: lower bound on the fractional size of every cluster
    :param a: lower bound on the fractional size of every set in own cluster for which stability holds
    :param b: lower bound on the fractional size of every set in outside cluster for which stability holds
    :param num_workers: number of workers
    :param metric: metric is in {avg, max, min}
    :return: Laminar list
    """
    print("Making the list laminar (parallel)")
    start = time.time()
    n = len(X)
    print("Computing pairs of non-laminar sets")
    with Pool(num_workers) as pool:
        func = partial(non_laminar, L)
        intersections = pool.map(func, range(len(L) - 1))
        pool.close()
        pool.join()
    intersections = [item for sub_list in intersections for item in sub_list]
    end = time.time()
    fname = "intersections_" + metric + ".pkl.gz"
    # with gzip.open(fname, 'wb') as f:
    #    pickle.dump(intersections, f)
    print("Length of intersections = ", len(intersections))
    print("time = {0:0.2f}s".format(end - start))
    print("Removing non-laminar pairs")
    start = time.time()
    manager = Manager()
    shared_L = manager.list(L)
    n = len(intersections)
    j = 0
    batch = int(n / num_workers)
    rem = n % num_workers
    jobs = []
    for i in range(num_workers):
        process = Process(
            target=iterate_laminar, args=(shared_L, X, e, a, b, num_workers, metric, intersections[j : j + batch])
        )
        process.start()
        jobs.append(process)
        j += batch
    if rem:
        process = Process(
            target=iterate_laminar, args=(shared_L, X, e, a, b, num_workers, metric, intersections[j : j + rem])
        )
        process.start()
        jobs.append(process)
    for p in jobs:
        p.join()
    L = [item for item in shared_L if item is not None]
    end = time.time()
    print("Length of list after removing non-laminar pairs = ", len(L))
    print("time = {0:.2f}s".format(end - start))
    return L
Example #25
0
def update_all(opts):
    """Updates all menus"""
    pool = NoDaemonPool(processes=5)
    pool.apply_async(update_applications, (opts,))
    pool.apply_async(update_bookmarks, (opts,))
    pool.apply_async(update_recent_files, (opts,))
    pool.apply_async(update_devices, (opts,))
    pool.apply_async(update_rootmenu, (opts,))
    pool.close()
    pool.join()
Example #26
0
def test():
    print("Creating 5 (non-daemon) workers and jobs in main process.")

    year = [x for x in range(2008, 2014)]

    pool = CustomPool(len(year)*4)

    result = pool.map(work,year)

    pool.close()
    pool.join()
Example #27
0
def _ConvertToWebP(webp_binary, png_files):
  pool = multiprocessing.pool.ThreadPool(10)
  def convert_image(png_path):
    root = os.path.splitext(png_path)[0]
    webp_path = root + '.webp'
    args = [webp_binary, png_path] + _PNG_TO_WEBP_ARGS + [webp_path]
    subprocess.check_call(args)
    os.remove(png_path)
  # Android requires pngs for 9-patch images.
  pool.map(convert_image, [f for f in png_files if not f.endswith('.9.png')])
  pool.close()
  pool.join()
Example #28
0
def work(num_procs):
    print "Creating %i (daemon) workers and jobs in child." % num_procs
    pool = multiprocessing.Pool(num_procs)

    result = pool.map(sleepawhile, [randint(1, 5) for x in range(num_procs)])

    # The following is not really needed, since the (daemon) workers of the
    # child's pool are killed when the child is terminated, but it's good
    # practice to cleanup after ourselves anyway.
    pool.close()
    pool.join()
    return result
Example #29
0
def parse_sam_in_threads(remap_csv, nthreads):
    """ Call parse_sam() in multiple processes.

    Launch a multiprocessing pool, walk through the iterator, and then be sure
    to close the pool at the end.
    """
    pool = Pool(processes=nthreads)
    try:
        reads = pool.imap(parse_sam, iterable=matchmaker(remap_csv), chunksize=100)
        for read in reads:
            yield read
    finally:
        pool.close()
        pool.join()
Example #30
0
def _ConvertToWebP(webp_binary, png_files):
  pool = multiprocessing.pool.ThreadPool(10)
  def convert_image(png_path):
    root = os.path.splitext(png_path)[0]
    webp_path = root + '.webp'
    args = [webp_binary, png_path, '-mt', '-quiet', '-m', '6', '-q', '100',
        '-lossless', '-o', webp_path]
    subprocess.check_call(args)
    os.remove(png_path)

  pool.map(convert_image, [f for f in png_files
                           if not _PNG_WEBP_BLACKLIST_PATTERN.match(f)])
  pool.close()
  pool.join()
Example #31
0
def start_multiprocess_obj(func_name,
                           params,
                           debug=False,
                           verbose=False,
                           nb_cpus=None):
    """

    Parameters
    ----------
    func_name : str
    params : List[List]
        each element in params must be object with attribute func_name
        (+ optional: kwargs)
    debug : boolean
    verbose : bool
    nb_cpus : int

    Returns
    -------
    result: List
        list of function returns
    """
    if nb_cpus is None:
        nb_cpus = cpu_count()

    if debug:
        nb_cpus = 1

    nb_cpus = min(nb_cpus, len(params), cpu_count())

    if verbose:
        log_mp.debug("Computing %d parameters with %d cpus." %
                     (len(params), nb_cpus))
    for el in params:
        el.insert(0, func_name)
    start = time.time()
    if nb_cpus > 1:
        pool = MyPool(nb_cpus)
        result = pool.map(multi_helper_obj, params)
        pool.close()
        pool.join()
    else:
        result = list(map(multi_helper_obj, params))
    if verbose:
        log_mp.debug("Time to compute: {:.1f} min".format(
            (time.time() - start) / 60.))
    return result
def kmer_analysis_per_base(bam_path="/homes/gws/sdorkenw/rrna/data/alignments/SRR891244.Aligned.sortedByCoord.out.bam",
                  # fa_path="/homes/gws/sdorkenw/rrna/data/ref_genomes/GRCh38.p3_genomic.fa",
                  fa_path="/homes/gws/sdorkenw/reference_genome_38/GRCh38_o.p3.genome.fa",
                  save_path="/homes/gws/sdorkenw/rrna/data/kmer_analysis/",
                  quality_threshold=0, contiguous=False, k=14, n_workers=10):
    pfile = pysam.AlignmentFile(bam_path, "rb")
    references = []
    ref_lengths = {}

    for ir in range(len(pfile.references)):
        ref = pfile.references[ir]
        if ref.startswith("chr"):
            references.append(ref)
            ref_lengths[ref] = pfile.lengths[ir]

    print "create reference sequences"
    reference_seqs = {}
    for rec in SeqIO.parse(open(fa_path), 'fasta'):
        if rec.id.startswith("chr"):
            reference_seqs[rec.id] = rec.seq

    multi_params = []
    for ref in references:
        multi_params.append([bam_path, ref, ref_lengths[ref], k, contiguous,
                             reference_seqs[ref], quality_threshold])


    if n_workers > 1:
        pool = NoDaemonPool(n_workers)
        results = pool.map(kmer_analysis_per_base_thread, multi_params)
        pool.close()
        pool.join()
    else:
        results = map(kmer_analysis_per_base_thread, multi_params)

    coverage_nonribo = []
    coverage_ribo = []
    for result in results:
        coverage_ribo.append(result[0])
        coverage_nonribo.append(result[1])

    if contiguous:
        np.save(save_path + "coverage_ribo_k%d_%s_cont" % (k, re.findall("[\d]+", bam_path)[-1]), coverage_ribo)
        np.save(save_path + "coverage_nonribo_k%d_%s_cont" % (k, re.findall("[\d]+", bam_path)[-1]), coverage_nonribo)
    else:
        np.save(save_path + "coverage_ribo_k%d_%s" % (k, re.findall("[\d]+", bam_path)[-1]), coverage_ribo)
        np.save(save_path + "coverage_nonribo_k%d_%s" % (k, re.findall("[\d]+", bam_path)[-1]), coverage_nonribo)
Example #33
0
def run_in_parallel(im, face):
    ranges = [[im, face, lower_white, upper_white],
              [im, face, lower_blue, upper_blue],
              [im, face, lower_orange, upper_orange],
              [im, face, lower_green, upper_green],
              [im, face, lower_red, upper_red],
              [im, face, lower_yellow, upper_yellow]]
    pool = Pool(processes=len(ranges))
    results = pool.map(find, ranges)
    pool.close()
    pool.join()

    color_order = ['white', 'blue', 'orange', 'green', 'red', 'yellow']
    for i, result in enumerate(results):
        cv2.imwrite(
            file_names[face].split('.')[0] + '_{}.png'.format(color_order[i]),
            result)
Example #34
0
def process_jobs(jobs, task=None, num_threads=24, use_thread=False):
    """Execute parallelized jobs

    Parameters
    ----------
    jobs: list(dict)
        Each element contains `function` and its parameters
    task: str, optional
        The name of task. If not specified, function name is used
    num_threads, int, optional
        The number of threads for parallelization. if not feeded, use the maximum
        number of process
    use_thread: bool, defulat False
        If True, use multi process. If False, use multi thread.
        Use True, if the multiprocessing exceeds the memory limit

    Returns
    -------
    List: each element is results of each part
    """
    if task is None:
        task = jobs[0]['func'].__name__
    if num_threads is None:
        num_threads = mp.cpu_count()
    out = []
    if num_threads > 1:
        if use_thread:
            pool = mp.pool.ThreadPool(processes=num_threads)
        else:
            pool = mp.Pool(processes=num_threads)
        outputs = pool.imap_unordered(expand_call, jobs)
        time0 = time.time()
        # Execute programs here
        for i, out_ in enumerate(outputs, 1):
            out.append(out_)
            report_progress(i, len(jobs), time0, task)
        pool.close()
        pool.join()
    else:
        for job in jobs:
            job = deepcopy(job)
            func = job['func']
            del job['func']
            out_ = func(**job)
            out.append(out_)
    return out
Example #35
0
def _ConvertToWebP(webp_binary, png_files):
    pool = multiprocessing.pool.ThreadPool(10)

    def convert_image(png_path):
        root = os.path.splitext(png_path)[0]
        webp_path = root + '.webp'
        args = [
            webp_binary, png_path, '-mt', '-quiet', '-m', '6', '-q', '100',
            '-lossless', '-o', webp_path
        ]
        subprocess.check_call(args)
        os.remove(png_path)

    # Android requires pngs for 9-patch images.
    pool.map(convert_image, [f for f in png_files if not f.endswith('.9.png')])
    pool.close()
    pool.join()
Example #36
0
def multi_pz(args, f):
    pool_size = 32
    #pool = ThreadPool(processes=pool_size)
    #pool = MyPool(processes=pool_size)
    pool = MyPool(processes=pool_size)
    #executor = ThreadPoolExecutor()
    worker_args = []
    for ch in args.chips:
        worker_args.append([args, f, ch])
    '''jobs = [executor.submit(pz_worker,[args,f,ch]) for ch in args.chips]
    for job in tqdm.tqdm(as_completed(jobs),total=len(args.chips)):
        pass'''
    print('Sending jobs to worker')

    results = pool.imap_unordered(pz_worker, worker_args)
    pool.close()
    pool.join()
def AnalyzeAllSongs(music_directory):
    song_list = []
    file_list = os.listdir(music_directory)
    for file in file_list:
        if file.endswith(".mp3"):
            song_name = file[0 : len(file) - 4]
            song_list.append(song_name)

    if __name__ == '__main__':
        num_processes = 8
        pool = MyPool(num_processes)
        song_list = pool.imap_unordered(AnalyzeSong, song_list)
        pool.close()
        pool.join()


    return song_list
Example #38
0
def do_work(egene, odir, faa_files, ffn_files, cores=1):
    # get best hits for given essential gene
    set_genes = set(list(hits.loc[hits['query_name']==egene]['target_name']))
    logger.info('Essential gene %s: %d genes' % (egene, len(set_genes)))

    # output files: FAA with prot. sequences and MSA
    genes_faa = os.path.join(odir,'gene_set_%s.faa' % egene)
    genes_faa_msa = "%s.msa" % os.path.splitext(genes_faa)[0]

    # protein sequences
    prot_seqs = {}

    # FAA (protein sequences)
    pool        = Pool(cores) # create pool for parallel computing
    pool_iter   = itertools.product(faa_files, [set_genes])
    results     = pool.starmap( scan_faa , pool_iter )
    pool.close(); pool.join()
    for records in results:
        assert all( [ r_id not in prot_seqs for r_id in records.keys() ] )
        prot_seqs.update(records)

    # FFN (translate nucl. sequences)
    remaining = set_genes.difference( prot_seqs.keys() )
    if len(remaining) > 0:
        logger.info('FFNs required for %d genes for SET %s' % (len(remaining), egene))
        pool            = Pool(cores) # create pool for parallel computing
        pool_iter       = itertools.product(ffn_files, [remaining], [True])
        results         = pool.starmap( scan_ffn , pool_iter )
        pool.close(); pool.join()
        for records in results:
            assert all( [ r_id not in prot_seqs for r_id in records.keys() ] ) # TEST
            prot_seqs.update( records )

    # Check if all found
    assert all([ r_id in set_genes for r_id in prot_seqs.keys() ]), ';'.join( list(set(prot_seqs.keys()).difference( set_genes )) )
    assert all([ r_id in prot_seqs.keys() for r_id in set_genes ]), ';'.join( list(set_genes.difference( prot_seqs.keys() )) )

    # write FASTA
    with open(genes_faa, "w") as ofile:
        for gene in set_genes:
            SeqIO.write(prot_seqs[gene], ofile, "fasta")

    # run MUSCLE
    cmd, cmd_stdout, cmd_status = run_muscle(ifile=genes_faa, ofile=genes_faa_msa, params=__MUSCLE_PARAMS_PROT__)
    assert cmd_status == 0, '%s: %d: %s' % (cmd, cmd_status, cmd_stdout)
    return
def resolve_dns_parallel(site, resolver_ips):
    """ Given a list of resolvers ips and a site, the function send DNS query to each on of them in parallel and
    returns the ips it got from all of them.
    """
    ip_list = []
    pool = multiprocessing.pool.ThreadPool(processes=AMOUNT_OF_RESOLVERS)
    args_for_worker = [(site, resolver_ip) for resolver_ip in resolver_ips]
    # args_for_worker = np.array(([site]*len(resolver_ips), resolver_ips)).T
    try:
        for ip in pool.imap(
                worker,
                args_for_worker):
            ip_list.extend(ip)
        pool.close()
        return ip_list
    except Exception:
        print("pool exception")
Example #40
0
def grid_thread(dataset,
                epochs,
                n_layers,
                neurons,
                activations=None,
                regularizations=None,
                optimizers=None,
                batch_size=[32],
                loss_fun=None,
                cvfolds=None,
                val_split=0,
                rlambda=None,
                verbose=0,
                val_set=None,
                val_loss_fun=None,
                trials=1):
    def grid_call(seed):

        fg, grid_res, pred = grid_search(dataset,
                                         epochs=epochs,
                                         batch_size=batch_size,
                                         n_layers=n_layers,
                                         val_split=val_split,
                                         activations=activations,
                                         regularizations=regularizations,
                                         rlambda=rlambda,
                                         cvfolds=cvfolds,
                                         val_set=val_set,
                                         verbose=verbose,
                                         loss_fun=loss_fun,
                                         val_loss_fun=val_loss_fun,
                                         neurons=neurons,
                                         optimizers=copy.deepcopy(optimizers),
                                         seed=seed)

        return fg

    fgs = list()
    t_trial = list(range(0, trials))

    pool = multiprocessing.pool.ThreadPool(processes=trials)
    return_list = pool.map(grid_call, t_trial, chunksize=1)

    pool.close()

    return return_list
Example #41
0
def send_block_to_network(node_list, our_node, block):
    node_list = copy.deepcopy(node_list)
    pool = multiprocessing.pool.ThreadPool(5)
    
    try:
        for node in range(0, len(node_list)):
            if node_list[node].id != our_node.id:
                node_list[node].path = "/post_block"
                node_list[node].method = "POST"
                node_list[node].data = block
        pool_output = pool.map(send_request, node_list)
    except:
        raise
        #print ("Timeout during send block to " + str(node_list[node].ip))

    pool.close()
    pool.join()
Example #42
0
def resolve_dns(url_list):
    """Given a list of hosts, return dict that maps qname to
    returned rdata records.
    """
    response_dict = collections.defaultdict(list)
    # create pool for querys but cap max number of threads
    pool = multiprocessing.pool.ThreadPool(
        processes=min(len(url_list) * 3, 60))
    # run for all combinations of hosts and qnames
    for qname, rdatalist in pool.imap(
            worker,
            itertools.product(url_list, ('A', 'NS')),
            #itertools.product(url_list, ('CNAME')),
            chunksize=1):
        response_dict[qname].extend(rdatalist)
    pool.close()
    return response_dict
Example #43
0
def get_imgurls(keyword, keyword_url, keyword_file):
    logging.info(
        "keyword: %s\n"
        "keyword_url: %s\n"
        "keyword_file: %s",
        keyword,
        keyword_url,
        keyword_file,
    )

    search_url = f"https://wall.alphacoders.com/search.php?search={keyword_url}"
    sub_category_url = requests.get(search_url).url

    match = re.search(
        r"<div id='next_button'>.*?/([0-9]+)",
        requests.get(sub_category_url).text,
        re.MULTILINE | re.DOTALL,
    )
    if match:
        num_pages = int(match.group(1))
    else:
        num_pages = 1

    logging.info("total %s pages", num_pages)

    imgurl_queue = multiprocessing.Queue()

    jobs = []
    for i in range(num_pages):
        jobs.append((i + 1, sub_category_url, imgurl_queue))

    pool = multiprocessing.pool.ThreadPool(os.cpu_count() * THREAD_PER_CPU)
    pool.map(get_imgurl_worker, jobs)
    try:
        pool.close()
        pool.join()
    except KeyboardInterrupt:
        print("KeyboardInterrrrrrrrrupt!")
        pool.terminate()
        pool.join()

    res = []
    while not imgurl_queue.empty():
        res.append(imgurl_queue.get())
    return list(set(res))
Example #44
0
 def _getRnnFeature(self, mat):
     ''' Extracting Rnn features '''
     print("Extracting Rnn features...")
     args = [np.array(mat[i:i+LOOK_BACK]) for i in range(len(mat) - self._cutoff)]
     start_time = time.time()
     pool = MyPool(NUM_THREADS)
     asks = np.array(pool.map(_getRnnAskPrice, args))
     pool.close()
     pool.join()
     pool = MyPool(NUM_THREADS)
     bids = np.array(pool.map(_getRnnBidPrice, args))
     pool.close()
     pool.join()
     print("Extracting finished!")
     print("Extracting time: " + str(time.time() - start_time) + " s")
     print(asks.shape)
     print(bids.shape)
     return asks, bids
def extract_public_galaxy_servers_tools():
    """Extract the tools from the public Galaxy servers using their API"""
    servers = extract_public_galaxy_servers()
    server_tools = {}

    to_process = []
    for index, server in servers.iterrows():
        to_process.append(server)

    pool = multiprocessing.pool.ThreadPool(processes=20)
    processed = pool.map(fetch_and_extract_individual_server_tools, to_process, chunksize=1)
    pool.close()

    for server_data in processed:
        if server_data:
            server_tools[server_data[0]] = server_data[1]

    return server_tools
Example #46
0
def _ConvertToWebP(webp_binary, png_files):
    pool = multiprocessing.pool.ThreadPool(10)

    def convert_image(png_path):
        root = os.path.splitext(png_path)[0]
        webp_path = root + '.webp'
        args = [
            webp_binary, png_path, '-mt', '-quiet', '-m', '6', '-q', '100',
            '-lossless', '-o', webp_path
        ]
        subprocess.check_call(args)
        os.remove(png_path)

    pool.map(
        convert_image,
        [f for f in png_files if not _PNG_WEBP_BLACKLIST_PATTERN.match(f)])
    pool.close()
    pool.join()
Example #47
0
def test_recursive_parallel_reduce(workers = 5):
    
    pool = RecursivePool()
    
    ranges = [range(1, 5), range(2, 9), range(3, 7)]
    
    print ranges
    
    results = []

    for myrange in ranges:
        pool.apply_async(parallel_reduce, [sum, myrange], 
            callback= results.append)

    pool.close()
    pool.join()

    print results
Example #48
0
def to_parquet(df, path, njobs=4):
    print(path)
    path = brtc_data_utils.make_data_path(path)
    print(path)
    os.makedirs(path)

    pool = multiprocessing.pool.ThreadPool()

    paths = []
    for grp, sample in df.groupby(
            lambda _: np.random.choice(range(njobs), 1)[0]):
        sub_path = os.path.join(path, '{}'.format(grp))
        paths.append(sub_path)
        pool.apply_async(_write_parquet, (sample, sub_path))

    pool.close()
    pool.join()
    return paths
Example #49
0
def _ConvertToWebP(webp_binary, png_files, path_info):
  pool = multiprocessing.pool.ThreadPool(10)
  def convert_image(png_path_tuple):
    png_path, original_dir = png_path_tuple
    # No need to add an extension, android can load images fine without them.
    webp_path = os.path.splitext(png_path)[0]
    args = [webp_binary, png_path, '-mt', '-quiet', '-m', '6', '-q', '100',
        '-lossless', '-o', webp_path]
    subprocess.check_call(args)
    os.remove(png_path)
    path_info.RegisterRename(
        os.path.relpath(png_path, original_dir),
        os.path.relpath(webp_path, original_dir))

  pool.map(convert_image, [f for f in png_files
                           if not _PNG_WEBP_BLACKLIST_PATTERN.match(f[0])])
  pool.close()
  pool.join()
Example #50
0
def send_node_list(node_list, our_node):
    node_list = copy.deepcopy(node_list)
    pool = multiprocessing.pool.ThreadPool(5)
    
    try:
        data = json.dumps(node_list, default=convert_to_dict).encode('utf-8')
        for node in range(0, len(node_list)):
            if node_list[node].id != our_node.id:
                node_list[node].path = "/post_node_list"
                node_list[node].method = "POST"
                node_list[node].data = data
        pool_output = pool.map(send_request, node_list)
    except:
        #raise
        pass

    pool.close()
    pool.join()
    def set_all_price(data):

        #db = wrds.Connection()
        #count = db.get_row_count(library="comp",
        #                         table="g_secd")
        #db.close()
        #count = 1000000
        #observ = 500000
        #iter = int(count / observ) if count % observ == 0 else int(count / observ) + 1
        #pt = ()
        #for v in range(iter):
        #    pt += price_tup(library='comp',
        #                    table='g_secd',
        #                    observation=observ,
        #                    offset=v * observ,
        #                    global_=True),
        #pool = multiprocessing.Pool()
        #result = pool.map(set_price, pt)
        #print(result)

        #db = wrds.Connection()
        #count = db.get_row_count(library="comp",
        #                         table="secd")
        #print(count)
        #db.close()

        observ = 1000000
        #count = 50000000
        #iter = int(count / observ) if count % observ == 0 else int(count / observ) + 1

        pt = ()
        for v in range(100,121):
            pt += price_tup(library='comp',
                            table='secd',
                            observation=observ,
                            offset=v * 1000000,
                            global_=False,
                            gvkey=v),
        print(pt)
        pool = MyPool(2)
        result = pool.map(set_data_parrallel_mode, pt)
        pool.close()
        pool.join()
        print(result)
Example #52
0
    def process(self):
        """
        Processing given archive and pushes the images it contains to the registry
        """
        start_time = time.time()
        results = []
        with tempfile.TemporaryDirectory() as tmp_dir_name:

            self._logger.info(
                'Processing archive',
                archive_path=self._extractor.archive_path,
                parallel=self._parallel,
                tmp_dir_name=tmp_dir_name,
            )

            # extract the whole thing
            self._extractor.extract_all(tmp_dir_name)

            manifest = self._get_manifest(tmp_dir_name)
            self._logger.debug('Extracted archive manifest', manifest=manifest)

            # prepare thread pool, note tarfile is not thread safe https://bugs.python.org/issue23649
            # so if full extraction is not done beforehand, this is not safe
            with multiprocessing.pool.ThreadPool(processes=self._parallel) as pool:
                for image_config in manifest:
                    res = pool.apply_async(
                        process_image,
                        (self._logger, self._registry, tmp_dir_name, image_config),
                    )
                    results.append(res)

                pool.close()
                pool.join()

        # this will throw if any pool worker caught an exception
        for res in results:
            res.get()

        elapsed = time.time() - start_time
        self._logger.info(
            'Finished processing archive',
            archive_path=self._extractor.archive_path,
            elapsed=humanfriendly.format_timespan(elapsed),
        )
def get_emd_inputs_for_current_pairs(current_origin_host, current_pairs, kmers_files, k, G, in_path_to_preprocessed_dir, in_path_to_results_output, build_partial_distance_matrix=False):
    distance_matrix_for_current_file_and_the_rest = []
    if(build_partial_distance_matrix):
        if (os.path.isfile(in_path_to_preprocessed_dir + '/partial_distance_matrix_for_origin_' + os.path.basename(current_origin_host) + '.npy')):
            distance_matrix_for_current_file_and_the_rest = np.load(in_path_to_preprocessed_dir + '/partial_distance_matrix_for_origin_' + os.path.basename(current_origin_host) + '.npy')
        else:
            kmers_in_current_origin = [str(i) for i in kmers_files[current_origin_host].keys()] 
            dbg_nodes = G.nodes(data='label')
            dbg_node_labels = []
            for node in dbg_nodes:
                dbg_node_labels.append(node[0])

            partial_distance_matrix = [[0 for i in range(len(dbg_node_labels))] for j in range(len(kmers_in_current_origin))]
    
            print("New distance matrix height is %d" % (len(partial_distance_matrix)))
            print("New distance matrix width is %d" % (len(partial_distance_matrix[0])))

            inputs_list = []

            for i in range(len(partial_distance_matrix[0])):
                for j in range(len(partial_distance_matrix)):
                    inputs_list.append((i, j, dbg_node_labels, kmers_in_current_origin, G))

            matrix_init_start_time = current_milli_time()
            print("Filling new distance matrix...")
            pool = multiprocessing.Pool(100)
            partial_distance_matrix_1d = pool.map(process_global_distance_matrix_entry, inputs_list)
            pool.close()
            pool.join()
            print("Filling new distance matrix... Done. Took %d seconds." % (current_milli_time() - matrix_init_start_time))
            print(len(partial_distance_matrix_1d))

            partial_distance_matrix_np = np.reshape(partial_distance_matrix_1d, (len(partial_distance_matrix), len(partial_distance_matrix[0])))
            np.save(in_path_to_preprocessed_dir + '/partial_distance_matrix_for_origin_' + os.path.basename(current_origin_host) + '.npy', np.array(partial_distance_matrix_np))
            distance_matrix_for_current_file_and_the_rest = partial_distance_matrix_np

    out = []
    current_pair_idx = 0

    clean_kmers = []
    for pair in current_pairs:
        current_pair_idx = current_pair_idx + 1
        out.append((pair, kmers_files, k, G, current_origin_host, distance_matrix_for_current_file_and_the_rest, build_partial_distance_matrix, clean_kmers, in_path_to_results_output))
    return out
Example #54
0
def train(seed, TrainingSet_Array, Labels_Array, List_TimeBatch, max_epoch,
          nTraj, i):
    #seed
    List_TimeOnline = []
    List_RewardOnline = []
    List_STDOnline = []
    List_LikelihoodOnline = []
    List_TimeLikelihoodOnline = []

    #given seed, trajectories
    Time_array_online = np.empty((0))
    RewardOnline_array = np.empty((0))
    STDOnline_array = np.empty((0))
    Likelihood_online_list = []
    time_likelihood_online_list = []

    TrainingSet_tot = TrainingSet_Array[seed, :, :]
    Labels_tot = Labels_Array[seed, :, :]
    TimeBatch = List_TimeBatch[0]

    pool = multiprocessing.Pool(processes=1)
    args = [(i, nTraj, TrainingSet_tot, Labels_tot, TimeBatch, seed)]
    givenSeed_training_results = pool.starmap(DifferentTrainingSet, args)

    pool.close()
    pool.join()

    Time_array_online = np.append(Time_array_online,
                                  givenSeed_training_results[0][0])
    Likelihood_online_list.append(givenSeed_training_results[0][1])
    time_likelihood_online_list.append(givenSeed_training_results[0][2])
    RewardOnline_array = np.append(RewardOnline_array,
                                   givenSeed_training_results[0][3])
    STDOnline_array = np.append(STDOnline_array,
                                givenSeed_training_results[0][4])

    List_TimeOnline.append(Time_array_online)
    List_RewardOnline.append(RewardOnline_array)
    List_STDOnline.append(STDOnline_array)
    List_LikelihoodOnline.append(Likelihood_online_list)
    List_TimeLikelihoodOnline.append(time_likelihood_online_list)

    return List_TimeOnline, List_RewardOnline, List_STDOnline, List_LikelihoodOnline, List_TimeLikelihoodOnline
Example #55
0
def apply(func, data, n_jobs=8):
    """ 
    Map a parallel function to a list 
    
    Params:
    ---------
    func : function The function to be pararellized
    data : list Items in which to apply func
    n_jobs : int
    
    Return:
    ---------
    list : Results in the same order as found in data
    
    Example:
    ---------
    
    import parallel
    
    #Simple example
    mylist = range(10)
    
    def power_of_two(x):
        return x**2
        
    parallel.apply(
        power_of_two,
        mylist)
    
    Out:
        [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
    
    """

    pool = MyPool(n_jobs)
    
    result = pool.map(
        func,
        data)
    
    pool.close()
    pool.join()
    return result
Example #56
0
def downloadPackages(selected, cache, allowHashMismatch=False):
    pool = multiprocessing.Pool(5)
    tasks = []
    makedirs(cache)
    for p in selected:
        if not "payloads" in p:
            continue
        dir = os.path.join(cache, getPackageKey(p))
        makedirs(dir)
        for payload in p["payloads"]:
            name = getPayloadName(payload)
            destname = os.path.join(dir, name)
            fileid = os.path.join(getPackageKey(p), name)
            args = (payload, destname, fileid, allowHashMismatch)
            tasks.append(pool.apply_async(_downloadPayload, args))

    downloaded = sum(task.get() for task in tasks)
    pool.close()
    print("Downloaded %s in total" % (formatSize(downloaded)))
def resolve_dns(url_list):
    """
    Given a list of hosts, return dict that maps qname to
    returned rdata records.
    """
    response_dict = collections.defaultdict(list)
    # create pool for queries but cap max number of threads
    pool = multiprocessing.pool.ThreadPool(
        processes=min(len(url_list) * 3, 60))
    for qname, rdatalist in pool.imap(
            worker,
            itertools.product(
                url_list,
                ('A', 'AAA', 'PTR', 'CNAME', 'MX', 'NS', 'TXT', 'SOA')),
            chunksize=1):
        response_dict[qname].extend(rdatalist)
    pool.close()
    #print response_dict
    return response_dict
Example #58
0
 def build_file_index(self):
     self.valid_subdirs = list_valid_subdirs(self.directory)
     pool = multiprocessing.pool.ThreadPool()
     results = []
     self.filenames = []
     self.classes = []
     white_list_formats = ['jpeg', 'jpg', 'png']
     i = 0
     for dirpath in (os.path.join(self.directory, subdir) for subdir in self.valid_subdirs):
         results.append(pool.apply_async(list_valid_filenames_in_directory,
                                         (dirpath, white_list_formats,
                                          self.class_indices, self.follow_links)))
     for res in results:
         classes, filenames = res.get()
         self.classes[i:i + len(classes)] = classes
         self.filenames += filenames
         i += len(classes)
     pool.close()
     pool.join()
Example #59
0
def _ConvertToWebP(webp_binary, png_files):
  renamed_paths = dict()
  pool = multiprocessing.pool.ThreadPool(10)
  def convert_image(png_path_tuple):
    png_path, original_dir = png_path_tuple
    root = os.path.splitext(png_path)[0]
    webp_path = root + '.webp'
    args = [webp_binary, png_path, '-mt', '-quiet', '-m', '6', '-q', '100',
        '-lossless', '-o', webp_path]
    subprocess.check_call(args)
    os.remove(png_path)
    renamed_paths[os.path.relpath(webp_path, original_dir)] = os.path.relpath(
        png_path, original_dir)

  pool.map(convert_image, [f for f in png_files
                           if not _PNG_WEBP_BLACKLIST_PATTERN.match(f[0])])
  pool.close()
  pool.join()
  return renamed_paths
Example #60
0
def train(seed, TrainingSet_Array, Labels_Array, max_epoch, nTraj):
    #seed
    List_TimeBatch = []
    List_RewardBatch = []
    List_STDBatch = []
    List_LikelihoodBatch = []
    List_TimeLikelihoodBatch = []

    #given seed, trajectories
    Time_array_batch = np.empty((0))
    RewardBatch_array = np.empty((0))
    STDBatch_array = np.empty((0))
    Likelihood_batch_list = []
    time_likelihood_batch_list = []

    TrainingSet_tot = TrainingSet_Array[:, :]
    Labels_tot = Labels_Array[:, :]

    pool = multiprocessing.Pool(processes=3)
    args = [(i, nTraj, TrainingSet_tot, Labels_tot, seed)
            for i in range(len(nTraj))]
    givenSeed_training_results = pool.starmap(DifferentTrainingSet, args)

    pool.close()
    pool.join()

    for i in range(len(nTraj)):
        Time_array_batch = np.append(Time_array_batch,
                                     givenSeed_training_results[i][0])
        Likelihood_batch_list.append(givenSeed_training_results[i][1])
        time_likelihood_batch_list.append(givenSeed_training_results[i][2])
        RewardBatch_array = np.append(RewardBatch_array,
                                      givenSeed_training_results[i][3])
        STDBatch_array = np.append(STDBatch_array,
                                   givenSeed_training_results[i][4])

    List_TimeBatch.append(Time_array_batch)
    List_RewardBatch.append(RewardBatch_array)
    List_STDBatch.append(STDBatch_array)
    List_LikelihoodBatch.append(Likelihood_batch_list)
    List_TimeLikelihoodBatch.append(time_likelihood_batch_list)

    return List_TimeBatch, List_RewardBatch, List_STDBatch, List_LikelihoodBatch, List_TimeLikelihoodBatch