Example #1
0
def parse_genetrees(args):
    """parse a set of genetrees in serial or parallel fashion and run through PHYBASE"""
    is_nexus = False
    if args.input_file.endswith('.nex') or args.input_file.endswith('.nexus'):
        is_nexus = True
    chunks = get_genetree_chunks(args, is_nexus)
    print "Cleaning genetrees"
    if args.cores > 1:
        p = Pool(args.cores)
        trees = p.map(clean_genetree_worker, chunks)
    else:
        trees = map(clean_genetree_worker, chunks)
    p.close()
    # get taxa from first tree
    taxa = getTaxa(trees[0])
    # instantiate Phybase instance and analyse trees
    phybase = Phybase()
    star_tree, steac_tree = phybase.run(trees, args.outgroup, taxa)
    template = """#NEXUS\nbegin trees;\ntree 'STAR' = %s\ntree 'STEAC' = %s\nend;""" % (star_tree, steac_tree)
    print template
    star_steac_out = os.path.splitext(args.input_file)[0]
    star_steac_out += '.star_steac.trees'
    star_steac_out = open(star_steac_out, 'w')
    star_steac_out.write(template)
    star_steac_out.close()
Example #2
0
def start_crawlers(connector_class, num_processes=1):
    """
    Starts a spider process for each spider class in the project

    :param num_processes: the number of simultaneous crawling processes
    :param connector_class: the connector class that should be used by the
    spiders
    """
    spider_classes = pyjobs_crawlers.tools.get_spiders_classes()

    if num_processes == 0:
        connector = connector_class()
        with _get_lock('ALL') as acquired:
            if acquired:
                crawl(spider_classes, connector)
            else:
                print("Crawl process of 'ALL' already running")
            return

    # Splits the spider_classes list in x lists of size num_processes
    spider_classes_chunks = list()
    for x in range(0, len(spider_classes), num_processes):
        spider_classes_chunks.append(spider_classes[x:x + num_processes])

    # Start num_processes number of crawling processes
    for spider_classes_chunk in spider_classes_chunks:
        process_params_chunk = [(spider_class, connector_class)
                                for spider_class in spider_classes_chunk]
        p = Pool(len(process_params_chunk))
        p.map(start_crawl_process, process_params_chunk)
Example #3
0
def crawl_recursive_threaded(dirpath, ext):
    from database import indexer
    from database import utils
    from multiprocessing import Pool

    # convert to our infos
    cdir = indexer.DirInfo(dirpath, ext)
    cInfos = indexer.dirs_to_info(cdir.subfolders(), ext)

    # comment if you want a silent indexing
    print(cdir.to_string())

    # recursive pooled call
    # NOTE: child calls must not be pooled
    p = Pool(utils.Settings.config['processes'])
    infos = p.map(crawl_recursive, cInfos)
    p.close()

    # remove hierarchy
    dirInfos = [d for sublist in infos for d in sublist]
    dirInfos.append(cdir)

    print('I was crawling with %d processes' %
          utils.Settings.config['processes'])

    return dirInfos
Example #4
0
def rc(rf, alphabet, numOfThreads):
	tryn=0
	counterTmp = 0
	printCounter = 1000
	listBasic = []
	if rf.endswith('.rar'):
		funcChosen = unrar
	elif rf.endswith('.zip') or rf.endswith('.7z') :
		funcChosen = zipFileUnzip
	for a in range(1,len(alphabet)+1):
		for b in itertools.product(alphabet,repeat=a):
			k="".join(b)
			k=re.escape(k)
			listBasic.append(k)
			tryn+=1
			if len(listBasic) == numOfThreads:
				pool = Pool(numOfThreads)
				pool.map_async(funcChosen, listBasic, callback = exitPass)
				pool.close()
				if resultPass:
					timeWasted = time.time()-start
					print 'Found! Password is '+resultPass
					print "It took " +str(round(time.time()-start,3))+" seconds"
					print "Speed: "+str(round(tryn/float(timeWasted),2))+" passwords/sec"
					print "Tried "+str(tryn)+" passwords"
					exit()
				listBasic = []
			counterTmp+=1
			if counterTmp >= printCounter:
				print 'Trying combination number '+str(tryn)+':'+str(k)
				timeWasted = round(time.time()-start,2)
				if timeWasted > 0:
					print "It took already " +str(timeWasted) +" seconds. Speed: "+str(round(tryn/float(timeWasted),2))+" passwords/sec"
				counterTmp=0
def k_rbm(infile, outfile):
    #dataset
    data = sio.loadmat(infile)['data']

    # reconstruction cost
    cost_dict = {}
    p = Pool(5)
    first_arg = ["Thread-1", "Thread-2", "Thread-3", "Thread-4", "Thread-5"]
    second_arg = data
    a,b,c,d,e = p.map(rbm_star, itertools.izip(first_arg, itertools.repeat(second_arg)))
    # p.map(rbm_star, itertools.izip(first_arg, itertools.repeat(second_arg)))
    # get the costs from the tuples
    cost_1 = a[0]
    cost_2 = b[1]
    cost_3 = c[2]
    cost_4 = d[3]
    cost_5 = e[4]
    # find the cluster assignments
    for i in xrange(len(cost_1)):
        mincost = min(cost_1[i],cost_2[i],cost_3[i],cost_4[i],cost_5[i])
        if mincost == cost_1[i]:
            cost_dict[i+1] = 1
        elif mincost == cost_2[i]:
            cost_dict[i+1] = 2
        elif mincost == cost_3[i]:
            cost_dict[i+1] = 3
        elif mincost == cost_4[i]:
            cost_dict[i+1] = 4
        else:
            cost_dict[i+1] = 5

    # store results
    json.dump(cost_dict, open(outfile, 'w'))
Example #6
0
def spawn_runpy(cp, wait=60, cb=check_rst):
    "as decorator to run job"
    global WAITQ, RUNQ, CFG
    pool = Pool(processes=CFG['MAXJOBS'])
    while len(WAITQ) > 0 or len(RUNQ) > 0:
        if len(RUNQ) <= CFG['MAXJOBS'] and len(WAITQ) > 0:
            path, test = WAITQ.pop()
            rst = pool.apply_async(call_runpy, (cp, path, test,))
            RUNQ.append((rst, test, timeit.default_timer()))
        else:
            for r in RUNQ:
                usec = float("%.2f" %(timeit.default_timer()-r[2]))
                if r[0].successful:
                    print "[{0}] success used {1} usec".format(r[1], usec)
                    RUNQ.remove(r)
                    if cb:
                        cb(r[1], 'pass', usec)
                else:
                    if usec > CFG['TIMEOUT']:
                        print "[{0}] unsuccess used timeout {1} usec".format(r[1], usec)
                        r[0].terminate()
                        if cb:
                            cb(r[1], 'fail', usec)

        time.sleep(float(wait))
Example #7
0
        def compress_file(self,corpus, np=4,separator=None):
                """
		construct WLZW pattern out of a corpus, parallelism is an option
		@param corpus - string, file path of the corpus
		@param np - number of processes, if np = 1 the algorithm is run in serial
		@param separator - the separator string to separate doc id and document. pass None if no doc id is given
		@return set, the final set containing all frequent patterns
		"""

                #if only one process, no need for parallelization
                if np==1:
                        return set(_compress_file((corpus,0,np,separator)))

                p=Pool(processes=np)
                l=[]
                for i in range(0,np):
                        l.append((corpus,i,np,separator))
                result=p.imap_unordered(_compress_file,l,1)

                if np==1:
                        final_set=result.next()
                else:
                        final_set=_union(result)

                return final_set
Example #8
0
    def get(self):
        mode = toAlpha3Code(self.get_argument('lang'))
        text = self.get_argument('q')
        if not text:
            self.send_error(400, explanation='Missing q argument')
            return

        def handleCoverage(coverage):
            if coverage is None:
                self.send_error(408, explanation='Request timed out')
            else:
                self.sendResponse([coverage])

        if mode in self.analyzers:
            pool = Pool(processes=1)
            result = pool.apply_async(getCoverage, [text, self.analyzers[mode][0], self.analyzers[mode][1]])
            pool.close()

            @run_async_thread
            def worker(callback):
                try:
                    callback(result.get(timeout=self.timeout))
                except TimeoutError:
                    pool.terminate()
                    callback(None)

            coverage = yield tornado.gen.Task(worker)
            handleCoverage(coverage)
        else:
            self.send_error(400, explanation='That mode is not installed')
Example #9
0
def main(path, out, cores):
    """
    Compute contact energies for each pdb in path and write results to 'out'.
    :param path: str
    :param out: str
    :param cores: int
    :return: None
    """
    # Find all pdbs in path
    workload = []
    for file in os.listdir(path):
        if os.path.splitext(file)[1].lower() == ".pdb":
            workload.append(file)
    # Print few newlines to prevent progressbar from messing up the shell
    print("\n\n")
    # Compute energies
    pool = Pool(processes=cores)
    results = []
    for (nr, pdb) in enumerate(workload):
        updateprogress(pdb, nr / len(workload))
        e = computecontactenergy(os.path.join(path, pdb), pool)
        results.append((pdb, e))
    pool.close()
    # Make 100% to appear
    updateprogress("Finished", 1)
    # Store output
    with open(out, "w") as handler:
        handler.write("PDB,Energy in kcal/mol\n")
        for pair in results:
            handler.write("{},{}\n".format(*pair))
Example #10
0
class JobPool(object):

    """
    Pool container.
    """
    pool = None
    message_queue = None

    def __init__(self, max_instances=4):
        self.message_queue = Queue()
        self.pool = Pool(max_instances, execute_task, (self.message_queue,))
        atexit.register(self.clear)

    def add_analysis(self, analysis):
        """
        Add analysis to the pool.
        """
        analysis.set_started()
        self.message_queue.put(analysis)

    def clear(self):
        """
        Pool cleanup.
        """
        self.pool.terminate()
        self.pool.join()
Example #11
0
class YaraJobPool(object):

    """
    Yara pool container.
    """
    pool = None
    message_queue = None

    def __init__(self, max_instances=3):
        self.message_queue = Queue()
        self.pool = Pool(max_instances, execute_yara_task,
                         (self.message_queue,))
        atexit.register(self.clear)

    def add_yara_task(self, yara_task):
        """
        Adds the yara task.
        """
        self.message_queue.put(yara_task)

    def clear(self):
        """
        Pool cleanup.
        """
        self.pool.terminate()
        self.pool.join()
    def get_location(self):
        """

        Extracts the location of each pixel in the satellite image

        """
        self.ncols = self.satellite_gdal.RasterXSize / 2
        self.nrows = self.satellite_gdal.RasterYSize / 2
        self.length_df = self.nrows * self.ncols
        print 'Columns, rows', self.ncols, self.nrows
        cols_grid, rows_grid = np.meshgrid(
                    range(0, self.ncols), 
                    range(0, self.nrows))
        self.cols_grid = cols_grid.flatten()
        self.rows_grid = rows_grid.flatten()
        print 'Checking the meshgrid procedure works'
        # getting a series of lat lon points for each pixel
        self.geotransform = self.satellite_gdal.GetGeoTransform()
        print 'Getting locations'
        self.location_series = np.array(parmap.starmap(
                        pixel_to_coordinates, 
                        zip(self.cols_grid, self.rows_grid), 
                        self.geotransform,
                        processes = self.processes))
        print 'Converting to Points'
        pool = Pool(self.processes)
        self.location_series = pool.map(
                        point_wrapper, 
                        self.location_series)
Example #13
0
def get_fractional_errors(R_star, L_star, P_c, T_c):
	"""
		Pass in "guess" conditions.
		Will then calculate inward and outward errors,

		Returns:
			[Data array]
			dY - over/undershoots (+/-, going outward)
				[dx handled outside this]
	"""

	# R_star, L_star, P_c, T_c = x

	P_c_0		= modelparameters.P_c # core pressure, [dyne cm^-2]
	T_c_0 		= modelparameters.T_c # core temperature, [K]
	R_star_0 	= modelparameters.R_star
	L_star_0 	= modelparameters.L_star

	print ""
	print "R: " + str(R_star / R_star_0)
	print "L: " + str(L_star / L_star_0)
	print "P: " + str(P_c / P_c_0)
	print "T: " + str(T_c / T_c_0)


	X 		= modelparameters.X
	Y 		= modelparameters.Y
	Z 		= modelparameters.Z
	mu 		= modelparameters.mu
	params 	= (X, Y, Z, mu)

	M_star 	= modelparameters.M_star
	m_fitting_point	= modelparameters.m_fitting_point

	pool = Pool(2)
	outward_results = pool.apply_async(integrate.integrate_outwards, 
		[M_star, m_fitting_point, P_c, T_c, mu, X, Y, Z] )

	inward_results  = pool.apply_async(integrate.integrate_inwards, 
		[M_star, m_fitting_point, R_star, L_star, mu, X, Y, Z] )

	m_outward, y_outward, infodict_outward 	= outward_results.get()

	m_inward, y_inward, infodict_inward 	= inward_results.get()

	dr = y_inward[-1,0] - y_outward[-1,0]
	dl = y_inward[-1,1] - y_outward[-1,1]
	dP = y_inward[-1,2] - y_outward[-1,2]
	dT = y_inward[-1,3] - y_outward[-1,3]

	dY = np.array([dr, dl, dP, dT])

	print ''
	print 'fractional errors:'
	print "dR: " + str(dr / y_inward[-1,0])
	print "dL: " + str(dl / y_inward[-1,1])
	print "dP: " + str(dP / y_inward[-1,2])
	print "dT: " + str(dT / y_inward[-1,3])

	return dY
Example #14
0
def process_articles(entity_type=Entity, output_filename='output-all.txt',
                     corpus_root='corpus/'):
    terms = select_terms(entity_type)
    
    Session.expunge_all()
    Session.close()
    
    articles = Session.query(Entity.sep_dir).filter(Entity.sep_dir!=None)
    articles = articles.filter(Entity.sep_dir!='')
    articles = articles.distinct().all()
    articles = [a[0] for a in articles]
   
    # parallel processing of articles
    p = Pool()
    args = [(title, terms, entity_type, None, corpus_root) for title in articles]
    doc_lines = p.map(process_wrapper, args)
    p.close()

    #serial processing for tests
    '''
    doc_lines = []
    for title in articles:
        lines = process_article(title, terms, entity_type, None, corpus_root)
        doc_lines.append(lines)
    '''

    # write graph output to file
    print output_filename
    with open(output_filename, 'w') as f:
        for lines in doc_lines:
            f.writelines(lines)
Example #15
0
  def score_all_genes(self, graph, num_procs=1):
    partial_score_gene = partial(score_gene, graph=graph, top_genes=self.top_genes)
    p = Pool(num_procs)
    result = p.map(partial_score_gene, list(self.vd.gene_names()))
    p.close()

    # convert them all to percentiles
    cent_hist = numpy.array([x[1] for x in result if x[1] != -1])
    nn_hist = numpy.array([x[2] for x in result if x[2] != -1])

    batch = []

    for gene, cent_score, nn_score in result:
      # edge case: gene is a top gene
      if gene in self.top_genes:
        cent_perc = 1
        nn_perc = 1
      # edge case: gene isn't in network
      elif cent_score == -1 or \
           nn_score == -1:
        cent_perc = 0
        nn_perc = 0
      else:
        cent_perc = scipy.stats.percentileofscore(cent_hist, cent_score) / 100.0
        nn_perc = 1 - scipy.stats.percentileofscore(nn_hist, nn_score) / 100.0

        print "gene:  %s\n  c:   %s\n  c_p: %s\n  n:   %s\n  n_p: %s" % \
          (gene, cent_score, cent_perc, nn_score, nn_perc)

      batch.append((cent_score, cent_perc, nn_score, nn_perc, gene))

    self.vd._c.executemany("UPDATE genes SET cent_score = ?, cent_perc = ?, " \
      "nn_score = ?, nn_perc = ? WHERE name = ?", batch)
    self.vd._conn.commit()
Example #16
0
def main():

    parser = ArgumentParser(description="Speed up your SHA. A different hash style.")
    parser.add_argument('-1', '--sha1', action='store_true')
    parser.add_argument('-2', '--sha224', action='store_true')
    parser.add_argument('-3', '--sha256', action='store_true')
    parser.add_argument('-4', '--sha384', action='store_true')
    parser.add_argument('-5', '--sha512', action='store_true')
    parser.add_argument('-f', '--file', type=str, help="The path to the file")

    if len(sys.argv) == 1:
        parser.print_help()
        return

    global args
    args = parser.parse_args()

    hashtree = ''

    big_file = open(args.file, 'rb')
    pool = Pool(multiprocessing.cpu_count())

    for chunk_hash in pool.imap(hashing, chunks(big_file)):
        hashtree += chunk_hash + ":hash"

    pool.terminate()

    print(str(hashing(hashtree.encode('ascii'))))
Example #17
0
def main():
    """
    ---------------------------------------------------------------------------
    AUTHOR: Kyle Hernandez
    EMAIL: kmhernan@utexas.edu

    Calculate the distribution of polymorphic RAD loci across site classes.
    ---------------------------------------------------------------------------

    USAGE: python snp_locations.py gmatrix.tab file.gff out.tab n_threads

    ARGUMENTS:
    	gmatrix.tab - Tab-delimited genotype matrix file of variant sites
        file.gff    - GFF file
        out.tab     - Output file of counts
        n_threads   - The number of threads to run
    """

    # Load the GFF and SNP positions into dictionaries
    load_gff()
    intergenic = process_matrix()
    
    # Map:
    # Create a pool of n_threads workers and use them to process
    # scaffolds separately
    ch_vals = sorted(gff_dict.keys())
    sys.stdout.write("Counting features...\n")
    pool    = Pool(processes=n_threads)
    ct_list = pool.map(process_dicts, ch_vals)

    # Reduce:
    # Process the list of dicts
    print_counts(intergenic, ct_list)
def matrix_vector_iteration_by_processes(A,x,k):
	# create a temporary directory to store the matrix and the vectors
	tmpdir = tempfile.mkdtemp()

	nvec = get_nvec(x)
	y = x.copy()

	save_matrix(tmpdir,A)
	for i in xrange(nvec):
		save_x(tmpdir,x,i)

	# start processes
	pool = Pool(processes=min(nvec,6))
	processes = []

	for i in xrange(nvec):
		processes.append( pool.apply_async(matrix_vector_iteration_process, (tmpdir,i,k)) ) 

	# fetch results (vector/matrix shape version)
	if x.ndim  == 1:
		processes[0].get()
		y = load_x(tmpdir,0)
	else:
		for i in xrange(nvec):
			processes[i].get()
			y[:,i] = load_x(tmpdir,i)

	pool.close()

	# remove temporary directory (with all it contains)
	shutil.rmtree(tmpdir)

	return y
Example #19
0
def fetch_imagery(image_locations, local_dir):
    pool = Pool(cpu_count())
    tupled = [(loc[0], loc[1], local_dir) for loc in image_locations]
    try:
        pool.map(fetch_imagery_uncurried, tupled)
    finally:
        pool.close()
Example #20
0
def compute_tdbf():
    conn = db_conn('bnc')
    cur = conn.cursor()
    # select keys and parsed from table
    sql = 'SELECT xmlID, divIndex, globalID, parsed FROM entropy_DEM100'
    cur.execute(sql)
    data = cur.fetchall()
    # initialize
    pool = Pool(multiprocessing.cpu_count())
    manager = Manager()
    queue = manager.Queue()
    # mp
    args = [(d, queue) for d in data]
    result = pool.map_async(compute_tdbf_worker, args, chunksize=5000)
    # manager loop
    while True:
        if result.ready():
            print('\n all rows processed')
            break
        else:
            sys.stdout.write('\r{}/{} processed'.format(queue.qsize(), len(args)))
            sys.stdout.flush()
            time.sleep(1)
    # update
    processed_results = result.get()
    for i, res in enumerate(processed_results):
        xml_id, div_idx, g_id, sub_tree, td, bf = res
        sql = 'UPDATE entropy_DEM100 SET parsedSimple = %s, td = %s, bf = %s WHERE xmlID = %s AND divIndex = %s AND globalID = %s'
        cur.execute(sql, (sub_tree, td, bf, xml_id, div_idx, g_id))
        if i % 999 == 0 and i > 0:
            sys.stdout.write('\r{}/{} updated'.format(i+1, len(processed_results)))
            sys.stdout.flush()
    conn.commit()
Example #21
0
	def downloadImages(self, dirName, urlData):
		child_folder = 'pictures'
		failures = 0
		dirName = os.path.join(dirName,child_folder)
		process_pool = Pool(processes=self._pool_size)
		results = []

		for ud in urlData:
			abs_img = os.path.join(dirName,urlparse(ud).path.strip('/'))
			try:
				os.makedirs(dirname(abs_img))
			except:
				pass
			results.append( process_pool.apply_async( urllib.urlretrieve, [ ud,  abs_img ] ) )

		self.initialize_bar(max=len(results))
		for result in results:
			try:
				result.get(self._timeout)
			except Exception:
				failures += 1
			else:
				self.update_bar()

		self.finish_bar()
		if failures: print("   Completed with errors: Downloaded {0}/{1}".format(len(results) - failures, len(results)))
		self.finish_bar()
Example #22
0
def get_needle_tips(images):
    """Get sample tips from images."""
    tips = []
    results = []

    # Do not make more processes than needed for the number of images.
    if len(images) > multiprocessing.cpu_count():
        proc_count = multiprocessing.cpu_count()
    else:
        proc_count = len(images)

    pool = Pool(processes=proc_count)

    for image in images:
        results.append(pool.apply_async(_get_ellipse_point,
                                        args=(image,)))

    for result in results:
        tip = result.get()
        if tip is not None:
            tips.append(tip)

    if len(tips) == 0:
        raise ValueError("No sample tip points found.")

    return tips
Example #23
0
def main(world_folder, replacement_file_name):
    global replacements
    world = nbt.world.WorldFolder(world_folder)
    logger = configure_logging()
    logger.info("Starting processing of %s", world_folder)
    if not isinstance(world, nbt.world.AnvilWorldFolder):
        logger.error("%s is not an Anvil world" % (world_folder))
        return 65 # EX_DATAERR
    if replacement_file_name != None:
        logger.info("Using Replacements file: %s", replacement_file_name)
        with open(replacement_file_name, 'r') as replacement_file:
            replacements = json.load(replacement_file)
    # get list of region files, going to pass this into function to process region
    region_files = world.get_regionfiles();
    
    # Parallel
    q = Queue()
    lp = threading.Thread(target=logger_thread, args=[q])
    lp.start()
    p = Pool(initializer=process_init, initargs=[q,replacements], maxtasksperchild=1)
    region_data = p.map(process_region, region_files)
    # Map has finished up, lets close the logging QUEUE
    q.put(None)
    lp.join()
    
    # Not Parallel
#     region_data = map(process_region, region_files)
    
    # Write output data
    write_block_data(region_data,"output.txt")
    return 0
def updateTranslation(args):
    # Get map that contains (besides other stuff)
    #  the crowdin ID for a given file
    translationFilemap = getTranslationFilemapCache(args.language, args.force_filemap_update)

    # Collect valid downloadable files for parallel processing
    fileinfos = []
    for filename, fileinfo in translationFilemap.items():
        filepath = os.path.join("cache", args.language, fileinfo["path"])
        # Create dir if not exists
        try: os.makedirs(os.path.dirname(filepath))
        except OSError as exc:
            if exc.errno == errno.EEXIST:
                pass
            else:
                raise
        fileid = fileinfo["id"]
        fileinfos.append((fileid, filepath))
    # Curry the function with the language
    performDownload = functools.partial(performPOTDownload, args.language)
    # Perform parallel download
    if args.num_processes > 1:
        pool = Pool(args.num_processes)
        pool.map(performDownload, fileinfos)
    else:
        for t in fileinfos:
            performDownload(t)
    #Set download timestamp
    timestamp = datetime.datetime.now().strftime("%y-%m-%d %H:%M:%S")
    with open("lastdownload.txt", "w") as outfile:  
        outfile.write(timestamp)
Example #25
0
  def __decrypt_file(self, private_d, public_n, keys, path_to_file, CRT, k):
    if CRT:
      pool = Pool(processes = k)
      promises = []
    decrpted_data = ''
    with open(path_to_file, 'r') as f:
      encrypted_data = f.read()
      encrypted_data_chunks = list(map(''.join, zip(*[iter(encrypted_data)]*len(str(public_n)))))
      for i in range(len(encrypted_data_chunks)):
        stripped = encrypted_data_chunks[i].lstrip('0')
        if CRT:
          promise = pool.apply_async(self.compute_part_of_message, args=(stripped, keys, i))
          promises.append(promise)
        else:
          decrpted_data += chr(self.__decrypt_message(stripped, private_d, public_n))
    if CRT:
      results = [promise.get() for promise in promises]
      decrypted_sorted = sorted(results, key = lambda x: x[1])
      for data in decrypted_sorted:
        decrpted_data += chr(data[0])

    if CRT:
      pool.close()
    with open(path_to_file + '.dec', 'w') as f:
      f.write(decrpted_data)
    return decrpted_data
Example #26
0
def multi_mode(start, stop):
    print "going multi"
    from multiprocessing import Pool

    pool = Pool(processes=4)
    result = pool.map(factorize, xrange(start, stop + 1), chunksize=100)
    print uniq_counter(result)
Example #27
0
class withPool:
    def __init__(self, procs):
        self.p = Pool(procs, init_func)
    def __enter__(self):
        return self
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.p.close()
Example #28
0
def mass_tri_plot(data, savedir, name='plot', Type='speed', Map=False):
    """
    Plots all time series.  Makes use of multiprocessing for speed.
    """
    trigrid = data['trigrid']
    #get the data to plot
    try:
        toPlot = data[Type]
    except KeyError:
        print Type + " is not an element of data.  Please calculate it."
        raise Exception("Invalid dictionary entry")
    #set the variable as a global variable
    global plotvar 
    plotvar = toPlot
    global saveDir
    saveDir = savedir
    global grid
    grid = trigrid
    #see if the save directory exists, or make it
    if not os.path.exists(savedir):
        os.makedirs(savedir)
    l = toPlot.shape[0]
    
    p = Pool(4)
    plt.gca().set_aspect('equal')
    p.map(save_plot, range(50))
    clearall()
Example #29
0
def main():
    idir, ofile, dffile = _parse_cmdline()

    print u'Loading doc-freqs file {}...'.format(dffile)
    with open(dffile, 'rb') as f:
        df = pickle.load(f)    

    print u'Reading input directory: {}'.format(idir)
    jobs = _load_jobs(idir, df)

    # Do the work.
    pool = Pool(4)
    njobs = len(jobs)

    try:
        import sys
        with codecs.open(ofile, 'wb') as pf:
            pickle.dump(njobs, pf)
            results = pool.imap_unordered(worker, jobs)
            for i, result in enumerate(results, 1):
                pickle.dump(result, pf)
                per = 100 * (float(i) / njobs)
                sys.stdout.write(u'\rPercent Complete: {:2.3f}%'.format(per))
                sys.stdout.flush()
            sys.stdout.write(u'\rPercent Complete: 100%    \n')
            sys.stdout.flush()

    except KeyboardInterrupt:
        sys.stdout.write(u'\rPercent Complete: {:2.3f}%    \n'.format(per))
        sys.stdout.write(u'Shutting down.\n')
        sys.stdout.flush()
        sys.exit()

    print u'Complete!'
Example #30
0
 def process(self):
     
     try:
         urls = redis_one.hkeys(self.sitemap_prefix)
         ofh = open('test_urls.txt', 'w+')
         urls.sort()
         ofh.write(('\n'.join(urls)).encode('utf8', 'ignore'))
         logger.error('total urls len %s' % len(urls))
         dict_res = defaultdict(int)
         i = 0
         while i <= len(urls):
             pool = Pool(processes=15)
             q = Queue()
             dict_subres = defaultdict(int)
             list_urls = [urls[i + j * 10000:i+(j+1)*10000] for j in range(15)]
             #list_dict_res = list(pool.map_async(parse_content, list_urls))
             for d in pool.imap(parse_content, list_urls):
                 for k, v in d.iteritems():
                     dict_res[k] += v
             logger.error('Parser %s %s' % (len(list_urls), len(dict_res)))
             i += 10000 * 15
         sorted_dict_res = sorted(dict_res.iteritems(), key = lambda s: s[1], reverse=True)
         ofh = open('./test_sitemap_keywords', 'w+')
         ofh.write('\n'.join(['%s\t%s' % (k,v) for (k,v) in sorted_dict_res if v>=3]).encode('utf8', 'ignore'))
         ofh.close()
     except:
         logger.error(traceback.format_exc())