Ejemplo n.º 1
0
 def _itergroundings(self, simplify=False, unsatfailure=False):
     global global_bpll_grounding
     global_bpll_grounding = self
     if self.multicore:
         pool = Pool(maxtasksperchild=1)
         try:
             for gndresult in pool.imap(with_tracing(create_formula_groundings), self.formulas):
                 for fidx, stat in gndresult:
                     for (varidx, validx, val) in stat: 
                         self._varidx2fidx[varidx].add(fidx)
                         self._addstat(fidx, varidx, validx, val)
                     checkmem()
                 yield None
         except CtrlCException as e:
             pool.terminate()
             raise e
         pool.close()
         pool.join()
     else:
         for gndresult in imap(create_formula_groundings, self.formulas):
             for fidx, stat in gndresult:
                 for (varidx, validx, val) in stat: 
                     self._varidx2fidx[varidx].add(fidx)
                     self._addstat(fidx, varidx, validx, val)
             yield None
Ejemplo n.º 2
0
 def add_tree(self, iterations=-1, snapshot=False):
     """
     Multi-core, fully utilizes underlying CPU to create the trees
     of the forest and stores them into the forest's list of trees
     :param iterations: number of trees to make, -1 means use default setting
     :return: None
     """
     print("Adding trees:", iterations)
     if iterations == -1:
         iterations = self.default_tree_count
     #########################
     # MULTI THREADED
     ########################
     pool = Pool()  # creates multiple processes equal to cores in machine
     outputs = pool.map(make_tree, [(self.data_copy(), self.depthlimit, self.weak_learner)
                                    for _ in range(iterations)])
     pool.close()
     pool.join()
     self.trees.extend(outputs)  # get the trees created and store them
     #########################
     # SINGLE THREADED
     ########################
     #for i in range(iterations):
     #    tree = Tree(self.data, self.bagging, self.bag_ratio, self.depthlimit, self.weak_learner)
     #    self.trees.append(tree)  # get the trees created and store them
     if snapshot:
         self.sum_squares(len(self.trees))  # get error after each snapshot, if this command is run multiple times
Ejemplo n.º 3
0
def work(host, port, processes, threads, times):
    pool = Pool(processes,
                lambda: signal.signal(signal.SIGINT, signal.SIG_IGN))
    p = Process(target=progress)
    p.daemon = True

    start = time.time()

    try:
        for chunk in divide(times, processes):
            pool.apply_async(thread, (host, port, threads, chunk))

        p.start()

        pool.close()
        pool.join()
        p.terminate()
        p.join()

    except KeyboardInterrupt:
        pool.terminate()
        p.terminate()
        p.join()
        pool.join()

    return time.time() - start
Ejemplo n.º 4
0
 def _itergroundings(self, simplify=True, unsatfailure=True):
     # generate all groundings
     if not self.formulas:
         return
     global global_fastConjGrounding
     global_fastConjGrounding = self
     batches = list(rndbatches(self.formulas, 20))
     batchsizes = [len(b) for b in batches]
     if self.verbose:
         bar = ProgressBar(width=100, steps=sum(batchsizes), color='green')
         i = 0
     if self.multicore:
         pool = Pool()
         try:
             for gfs in pool.imap(with_tracing(create_formula_groundings), batches):
                 if self.verbose:
                     bar.inc(batchsizes[i])
                     bar.label(str(cumsum(batchsizes, i + 1)))
                     i += 1
                 for gf in gfs: yield gf
         except Exception as e:
             logger.error('Error in child process. Terminating pool...')
             pool.close()
             raise e
         finally:
             pool.terminate()
             pool.join()
     else:
         for gfs in imap(create_formula_groundings, batches):
             if self.verbose:
                 bar.inc(batchsizes[i])
                 bar.label(str(cumsum(batchsizes, i + 1)))
                 i += 1
             for gf in gfs: yield gf
Ejemplo n.º 5
0
def main(datadir, convert_dir, crop_size):
    try:
        os.mkdir(convert_dir)
    except OSError:
        pass

    filenames = data_util.get_image_files(datadir)

    print('Resizing images in {} to {}'.format(datadir, convert_dir))

    n = len(filenames)

    batch_size = 500
    batches = n // batch_size + 1
    p = Pool()

    args = []

    for f in filenames:
        args.append((convert_size, (datadir, convert_dir, f, crop_size)))

    for i in range(batches):
        print('batch {:>2} / {}'.format(i + 1, batches))
        p.map(convert, args[i * batch_size : (i + 1) * batch_size])

    p.close()
    p.join()
    print('Done')
Ejemplo n.º 6
0
def main():
	global pool
	pool = Pool(POOL_SIZE)
	
	
	nseeds = 100
	
#	print("== generating seeds...")
#	generate_seeds(nseeds)
	
	#print("running const density experiments...")
	#run_constant_density(0.1, range(100, 1000, 100), nseeds)
	
	#print("running const size experiments...")
	#run_constant_size(50, range(100, 1000, 100), nseeds)
	
	print("== running aggregate interval experiments (const density)...")
#	run_aggregate_interval_constant_density(0.1, range(100, 1000, 100), nseeds, [100, 500] + list(range(1000, 4000, 1000)))

	run_aggregate_interval_constant_density(0.1, range(100, 1000, 100), nseeds, [3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 50000])
	reset_pool()
	run_aggregate_interval_constant_density(0.2, range(100, 1000, 100), nseeds, [100, 500, 1000, 2000,4000, 5000, 6000, 7000, 8000, 9000, 10000, 50000])
	reset_pool()
	run_aggregate_interval_constant_density(0.3, range(100, 1000, 100), nseeds, [100, 500, 1000, 2000,4000, 5000, 6000, 7000, 8000, 9000, 10000, 50000])
	reset_pool()
	run_aggregate_interval_constant_density(0.4, range(100, 1000, 100), nseeds, [100, 500, 1000, 2000,4000, 5000, 6000, 7000, 8000, 9000, 10000, 50000])
	reset_pool()
	run_aggregate_interval_constant_density(0.5, range(100, 1000, 100), nseeds, [100, 500, 1000, 2000,4000, 5000, 6000, 7000, 8000, 9000, 10000, 50000])

	pool.close()
	pool.join()
Ejemplo n.º 7
0
def query_tweets(query, limit=None, begindate=dt.date(2006, 3, 21), enddate=dt.date.today(), poolsize=20, lang=''):
    no_days = (enddate - begindate).days
    if poolsize > no_days:
        # Since we are assigning each pool a range of dates to query,
		# the number of pools should not exceed the number of dates.
        poolsize = no_days
    dateranges = [begindate + dt.timedelta(days=elem) for elem in linspace(0, no_days, poolsize+1)]

    if limit:
        limit_per_pool = (limit // poolsize)+1
    else:
        limit_per_pool = None

    queries = ['{} since:{} until:{}'.format(query, since, until)
               for since, until in zip(dateranges[:-1], dateranges[1:])]

    all_tweets = []
    try:
        pool = Pool(poolsize)
        logger.info('queries: {}'.format(queries))
        try:
            for new_tweets in pool.imap_unordered(partial(query_tweets_once, limit=limit_per_pool, lang=lang), queries):
                all_tweets.extend(new_tweets)
                logger.info('Got {} tweets ({} new).'.format(
                    len(all_tweets), len(new_tweets)))
        except KeyboardInterrupt:
            logger.info('Program interrupted by user. Returning all tweets '
                         'gathered so far.')
    finally:
        pool.close()
        pool.join()

    return all_tweets
Ejemplo n.º 8
0
class _MultiExecutor(_Executor):
    """Execute functions async in a process pool"""

    def __init__(self):
        super(_MultiExecutor, self).__init__()
        self._children = 0
        self.pool = Pool()

    def _collector(self, result):
        super(_MultiExecutor, self)._collector(result)
        self._children -= 1

    def execute(self, func, args):
        self._children += 1
        self.pool.apply_async(func, args, callback=self._collector)

    def wait_for_results(self):
        self.pool.close()
        # One would have hoped joining the pool would take care of this, but
        # apparently you need to first make sure that all your launched tasks
        # has returned their results properly, before calling join, or you
        # risk a deadlock.
        while self._children > 0:
            time.sleep(0.001)
        self.pool.join()
Ejemplo n.º 9
0
def get_correlation_parallel(s1,s2):
    """
    params s1 - series 1
    params s2 - series 2 
    NOTE : series are number 1 to 25 when giving in arguments
    returns the correlation between series
    """
    start = time.time()
    offsets = [] #this will be the arguments to all the parallel jobs
    instances = (MAX_ROWS/BATCH_SIZE)
    mean,std = calculate_mean_std_parallel()
    stripped_mean,stripped_std = calculate_stripped_mean_std_parallel(mean,std)
    processes = Pool(processes=instances)
    for i in range(instances):
        offsets.append((s1,s2,mean,std,stripped_mean,stripped_std,i*BATCH_SIZE))
    results = processes.map(get_correlation,offsets)
    processes.close()
    processes.join()
    pearson_corr = 0
    total = 0
    for result in results:
        pearson_corr += result[0]*result[1]
        total += result[1]
    pearson_corr = 1.0*pearson_corr / total
    t_value = abs(pearson_corr*math.sqrt( 1.0*(total - 2) / ( 1 - (pearson_corr*pearson_corr))))
    p_value = t.sf(t_value,total-2)
    print "\n ######### CORRELATION BETWEEN SERIES ",s1," AND SERIES ",s2, " is ",pearson_corr , "t value is ", t_value ," and p value is ", p_value,  "######### \n" 
    end = time.time()
    print "EXECUTION TIME : ", end-start , " sec"
    return pearson_corr
Ejemplo n.º 10
0
Archivo: local.py Proyecto: mailund/gwf
    def start(self):
        """Starts a server that controls local workers.

        Calling this function starts a pool of `num_workers` workers used to run
        targets sent to the server. The server will run indefinitely unless shut
        down by the user.
        """
        try:
            serv = Listener((self.hostname, self.port))
            workers = Pool(
                processes=self.num_workers,
                initializer=Worker,
                initargs=(self.status, self.queue, self.waiting),
            )

            logging.info(
                "Started %s workers, listening on port %s",
                self.num_workers,
                serv.address[1],
            )
            self.wait_for_clients(serv)
        except OSError as e:
            if e.errno == 48:
                raise ServerError(
                    (
                        "Could not start workers listening on port {}. "
                        "The port may already be in use."
                    ).format(self.port)
                )
        except KeyboardInterrupt:
            logging.info("Shutting down...")
            workers.close()
            workers.join()
            self.manager.shutdown()
Ejemplo n.º 11
0
def stat_volume(stime,etime):
    tgsinfo = read_tgs_info()

    # from multiprocessing.dummy import Pool as ThreadPool
    from multiprocessing.pool import Pool

    pool = Pool()
    volume = [pool.apply_async(stat_tgs_volume,args=(stime,etime,int(cid))) for cid in tgsinfo.keys()]
    pool.close()

    print 'waiting to join....'
    pool.join()

    print 'start to writing to file...'

    volume0 = []
    for i,elem in enumerate(volume):
        volume0.append((tgsinfo.keys()[i], elem.get()))
    volume0.sort(key=lambda x:x[1], reverse=True)

    total = 0
    with open(os.path.join(root_dir, "result", "volume.txt"),"w") as f:
        for i,elem in enumerate(volume0):
            # cid = tgsinfo.keys()[i]
            # vol = elem.get()
            total += elem[1]

            line = "%5s,%s: %d\n" % (elem[0], tgsinfo[elem[0]]['kkmc'], elem[1])
            f.write(line)

    print 'totally %d records.' % (total)
Ejemplo n.º 12
0
def ingest(
        dataset,
        cls,
        skip_if_exists=True,
        multi_process=False,
        multi_threaded=False,
        cores=None):

    pool = None

    if multi_process:
        pool = Pool(cores or cpu_count())
        map_func = pool.imap_unordered
    elif multi_threaded:
        pool = ThreadPool(cores or cpu_count())
        map_func = pool.imap_unordered
    else:
        map_func = map

    cls_args = repeat(cls)
    skip_args = repeat(skip_if_exists)

    map_func(ingest_one, zip(dataset, cls_args, skip_args))

    if pool is not None:
        # if we're ingesting using multiple processes or threads, the processing
        # should be parallel, but this method should be synchronous from the
        # caller's perspective
        pool.close()
        pool.join()
Ejemplo n.º 13
0
def main():
    print('Process (%s) start...' % os.getpid())
    p = Pool()
    for i in range(4):
        p.apply_async(long_time_task, args=(i,))
    print('Waiting for all subprocesses done...')
    p.close()
    p.join()
    print('All subprocesses done.')
def load_images_uint(files):
    p = Pool()
    process = imread
    results = p.map(process, files)
    p.close()
    p.join()
    images = np.array(results)
    images = images.transpose(0, 3, 1, 2)
    return images
Ejemplo n.º 15
0
def get_data():
	f2 = open('app_links1.txt','r')

	nprocs = 500 # nprocs is the number of processes to run
	ParsePool = Pool(nprocs)
	#ParsePool.map(btl_test,url)
	ParsedURLS = ParsePool.map(deatilsExtract,f2)
	ParsePool.close()
	ParsePool.join()
Ejemplo n.º 16
0
def get_word():
    domains=open('dic/newwords').readlines()
    try:
        pool=Pool(processes=2)
        pool.map(check_domain,domains)
        pool.close()
        pool.join()
    except Exception as e:
        print e
        pass
Ejemplo n.º 17
0
def calculate_stripped_mean_std_parallel(mean,std):
    """
    params - mean
    params - std
    returns stripped up mean and std
    """
    stripped_mean = []
    stripped_squares = []
    stripped_std = []
    dirty_data = []
    outliers = []
    for i in range(0,NO_OF_SERIES):
        stripped_std.append(0)
        stripped_squares.append(0)
        stripped_mean.append(0)
        dirty_data.append(0)
        outliers.append(0)
    start = time.time()
    offsets = [] #this will be the arguments to all the parallel jobs
    instances = (MAX_ROWS/BATCH_SIZE)
    processes = Pool(processes=instances)
    for i in range(instances):
        offsets.append((mean,std,i*BATCH_SIZE))
    results = processes.map(calculate_stripped_mean_std,offsets)
    processes.close()
    processes.join()
    total = 0
    for result in results:
        for i in range(len(result[0])):
            count = result[2] - result[3][i] #actual - dirty data
            stripped_mean[i] += result[0][i]*count
            stripped_squares[i] += result[1][i]*count
            dirty_data[i] += result[3][i]
            outliers[i] += result[4][i]
        total += result[2]

    for i in range(len(mean)):
        stripped_mean[i] = 1.0*(stripped_mean[i])/(total - dirty_data[i])
        stripped_squares[i] = 1.0*(stripped_squares[i]) / (total - dirty_data[i])
        stripped_std[i] = math.sqrt(stripped_squares[i] - (stripped_mean[i]*stripped_mean[i]))

    end = time.time()

    print "######### STRIPPED MEAN ######### \n"
    print stripped_mean
    print "\n ######### STRIPPED STANDARD DEVIATION ######### \n"
    print stripped_std
    print "\n######### NAN ROWS COUNT #########\n"
    print dirty_data
    print "\n######### OUTLIERS ROWS COUNT #########\n"
    print outliers
    print "\n######### EXECUTION TIME #########\n"
    print (end-start)

    return stripped_mean,stripped_std
Ejemplo n.º 18
0
    def run(self):
        cases = self.get_test_case()
        # 定义一个进程池
        pool = Pool(processes=len(cases))

        result.append(pool.map_async(self.init_driver, cases.values()))

        pool.close()
        pool.join()

        while not q.empty():
            comm.Template.set_middle(q.get())
Ejemplo n.º 19
0
    def _get(self, args):
        draft_id = args[0]
        id = args[1] if len(args) > 1 else None

        q = self.db.query(Player)
        if id is not None:
            player = q.filter(Player.id == int(id)).first()
            team = self.db.query(Team).filter(and_(Team.is_owner == True,
                                                   Team.draft_id == draft_id)).first()

            available_players = self.db.query(Player).join(Player.core).filter(and_(PlayerCore.rank != None,
                                                                                    PlayerCore.target_price != None,
                                                                                    PlayerCore.points > 0,
                                                                                    Player.draft_id == draft_id,
                                                                                    Player.team_id == None,
                                                                                    Player.id != player.id)).order_by(PlayerCore.rank).all()

            min_price = 1
            max_price = min(player.core.target_price + 21, team.money)
            manager = Manager()
            max_starters_points = manager.dict()
            max_bench_points = manager.dict()
            pool = Pool(processes=8)
            starters, bench = get_starters_and_bench(self.db, team.id)
            max_starters_points[0] = optimizer.optimize_roster(starters, available_players, team.money - (constants.BENCH_SIZE - len(bench)))[1]
            for m in range(min_price, 10):
                pool.apply_async(wrap_optimizer, args=(starters, available_players, team.money - m - (constants.BENCH_SIZE - len(bench)) + 1, max_bench_points, m))

            full_starters = True
            for s in starters:
                if s is None:
                    full_starters = False
            if not full_starters:
                starters_clone = list(starters)
                bench_clone = list(bench)
                place_player(player, starters_clone, bench_clone)
                for m in range(min_price, max_price):
                    pool.apply_async(wrap_optimizer, args=(starters_clone, available_players, team.money - m - (constants.BENCH_SIZE - len(bench_clone)), max_starters_points, m))

            pool.close()
            pool.join()

            ret = player.to_dict(['core'])
            ret['max_starters_points'] = dict(max_starters_points)
            ret['max_bench_points'] = dict(max_bench_points)

            return ret
        else:
            players = q.join(PlayerCore).filter(and_(Player.draft_id == int(draft_id),
                                                     PlayerCore.rank != None,
                                                     PlayerCore.target_price != None)).all()
            return {'players': [p.to_dict(['core']) for p in players]}
def parallel_augment(images, normalize=None, test=False):
    if normalize is not None:
        mean, std = normalize
        images = images - mean[:, np.newaxis, np.newaxis] # assuming channel-wise normalization
        images = images / std[:, np.newaxis, np.newaxis]

    p = Pool()
    process = partial(augment, test=test)
    results = p.map(process, images)
    p.close()
    p.join()
    augmented_images = np.array(results, dtype=np.float32)
    return augmented_images
Ejemplo n.º 21
0
def calculate_mean_std_parallel():
    """
    call this function to compute the mean, standard deviation and NaNs for each seies
    the file name, no of jobs can be changed in the settings file 
    """
    start = time.time()
    offsets = []
    instances = (MAX_ROWS/BATCH_SIZE)
    processes = Pool(processes=instances)
    for i in range(instances):
        offsets.append(i*BATCH_SIZE)
    print offsets
    result = processes.map(calculate_mean_std,offsets)
    processes.close()
    processes.join()
    mean = []
    std = []
    squares = []
    dirty_data = []
    #initializing
    for i in range(0,NO_OF_SERIES):
        mean.append(0)
        std.append(0)
        squares.append(0)
        dirty_data.append(0)

    total = 0
    ### here we combine the results from different processes / threads
    for r in result:
        for i in range(len(r[0])): ### update for each time series
            count = (r[2] - r[3][i])  ### actual count - the count with missing value
            mean[i] += r[0][i]*count
            squares[i] += r[1][i]*count
            dirty_data[i] += r[3][i]
        total += r[2]

    for i in range(len(mean)):
        mean[i] = 1.0*(mean[i])/(total - dirty_data[i])
        squares[i] = 1.0*(squares[i]) / (total - dirty_data[i])
        std[i] = math.sqrt(squares[i] - (mean[i]*mean[i]))
    end = time.time()
    print "######### MEAN ######### \n"
    print mean
    print "\n ######### STANDARD DEVIATION ######### \n"
    print std
    print "\n######### NAN ROWS COUNT #########\n"
    print dirty_data
    print "\n######### EXECUTION TIME #########\n"
    print (end-start)

    return mean,std
Ejemplo n.º 22
0
def main():
	"""
		Build all the models. Spin off a new process for each participant
		because the ANN library is not multithreaded. Process is used instead
		of thread to leverage multiple cores.
	"""
	parser = ArgumentParser()
	parser.add_argument("inputFilename")
	parser.add_argument("outputDirectory")
	
	args = parser.parse_args()
	inputFilename = args.inputFilename
	outputDirectory = args.outputDirectory
	
	data = pickle.load( open(inputFilename, 'rb') )
	
	tasks = [ 'matb', 'rantask' ]
	participantIds = [ '001', '002', '003', '004', '005', '006', '007' ]
	
	# Cut off first row header for each data set
	for task in tasks:
		for participantId in participantIds:
			data[participantId][task] = data[participantId][task][1:] 
			
	splits = performSplit( data )
	
	# Record start time so that the elapsed time can be determined
	start_time = time.time()
	
	# Create a multicore processing pool with 7 processes ( 7 so that one core stays free
	# for system processes )
	pool = Pool( processes = 7 )
	
	# Build models for participants in a task
	for task in tasks:
		for participantId in participantIds:
			outputFilename = path.join( outputDirectory, 'testingOn-' + participantId + '-' + task + '.txt' )
			
			# Spin off a process for the building
			pool.apply_async( tuneANN, ( splits[participantId][task], outputFilename ) )
			
	# Close down the pool so that we can wait on all the processes
	pool.close()
	pool.join()
	
	# Calculate and print the elapsed time
	elapsed_time = time.time() - start_time
	print( "Elapsed time: " + str(elapsed_time) )
Ejemplo n.º 23
0
    def multi_proc5(self, batch):
        start_time = datetime.datetime.now()

        sql = "select count(id) from records"
        count_result = db_connection.execute(sql)

        for row in count_result:
            count = row[0]
            break

        sql = "select id from records"
        result = db_connection.execute(sql)

        record_ids = []
        for idx, row in enumerate(result):
            if (idx % int(count/4) == 0) or (idx == count - 1): #4 because that is how many workers we have
                if idx == 0:
                    some_records = []
                else:
                    record_ids.append(some_records)
                    some_records = []
            some_records.append(row[0])

        input_pool = Pool(4)
        #Add id messages to input queue
        input_pool.map(partial(add_batch_ids_to_queue, batch_size=int(batch)), record_ids)
        input_pool.close()
        input_pool.join()


        output_pool = Pool(4)
        #Read ids from input_queue, read message from DB and write it to output_queue
        worker_results = []
        for i in range(4):
            worker_results.append(output_pool.apply_async(read_id_from_queue, ()))

        output_pool.close()

        for r in worker_results:
            r.get() # This reports results, including errors, of workers

        output_pool.join() # This blocks until all the processes have finished

        end_time = datetime.datetime.now()
        time_taken = (end_time - start_time).total_seconds()

        return time_taken
Ejemplo n.º 24
0
def create_training_parallel(count):
    pool_size = 8
    batch_count = pool_size * 5
    pool = Pool(pool_size)
    print("generating")
    results = []
    for i in range(batch_count):
        results.append(pool.apply_async(create_training_data, (count/batch_count,)))

    pool.close()
    pool.join()
    print("concatenating")

    output = []
    for r in results:
        output.extend(r.get(1000))
    return output
Ejemplo n.º 25
0
def manager_process(dir_queue, file_queue, out_queue):
    """Dispatches and manages path and scanning workers.

    """
    pool = Pool(options.num_threads)
    atexit.register(at_exit_manager, pool)
    logging.info('Gathering Files...')
    pool.apply(explore_path, (dir_queue, file_queue))
    logging.info('Files gathered. Scanning %s files...', file_queue.qsize())
    logging.info('Starting %s scan processes...', options.num_threads)
    print '~' * 80
    thread.start_new_thread(print_status, (file_queue,))
    for _ in range(options.num_threads):
        pool.apply_async(parallel_scan, (file_queue, out_queue))
    pool.close()
    pool.join()
    out_queue.put(StopIteration)
Ejemplo n.º 26
0
    def multi_proc3(self, batch):
        start_time = datetime.datetime.now()

        sql = "select count(id) from records"
        count_result = db_connection.execute(sql)

        for row in count_result:
            count = row[0]
            break

        sql = "select id from records"
        result = db_connection.execute(sql)

        record_ids = []
        for idx, row in enumerate(result):
            if (idx % int(batch) == 0) or (idx == count - 1):
                if idx == 0:
                    some_records = []
                else:
                    record_ids.append(some_records)
                    some_records = []
            some_records.append(row[0])


        #Add id messages to input queue
        msg_handler = MessageHandler()
        for records in record_ids:
            msg_handler.add_message(json.dumps({"ids":records}), "input_queue")

        worker_results = []
        p = Pool(4)
        for i in range(4):
            worker_results.append(p.apply_async(read_id_from_queue, ()))

        p.close()

        for r in worker_results:
            r.get()

        p.join() # This blocks until all the processes have finished

        end_time = datetime.datetime.now()
        time_taken = (end_time - start_time).total_seconds()

        return time_taken
Ejemplo n.º 27
0
def run(config_uri, app_name=None, username=None, types=(), batch_size=500, processes=None):
    # multiprocessing.get_context is Python 3 only.
    from multiprocessing import get_context
    from multiprocessing.pool import Pool

    # Loading app will have configured from config file. Reconfigure here:
    logging.getLogger('snovault').setLevel(logging.DEBUG)

    testapp = internal_app(config_uri, app_name, username)
    connection = testapp.app.registry[CONNECTION]
    uuids = [str(uuid) for uuid in connection.__iter__(*types)]
    transaction.abort()
    logger.info('Total items: %d' % len(uuids))

    pool = Pool(
        processes=processes,
        initializer=initializer,
        initargs=(config_uri, app_name, username),
        context=get_context('forkserver'),
    )

    all_results = []
    try:
        for result in pool.imap_unordered(worker, batched(uuids, batch_size), chunksize=1):
            results = result['results']
            errors = sum(error for item_type, path, update, error in results)
            updated = sum(update for item_type, path, update, error in results)
            logger.info('Batch: Updated %d of %d (errors %d)' %
                        (updated, len(results), errors))
            all_results.extend(results)
    finally:
        pool.terminate()
        pool.join()

    def result_item_type(result):
        # Ensure we always return a string
        return result[0] or ''

    for item_type, results in itertools.groupby(
            sorted(all_results, key=result_item_type), key=result_item_type):
        results = list(results)
        errors = sum(error for item_type, path, update, error in results)
        updated = sum(update for item_type, path, update, error in results)
        logger.info('Collection %s: Updated %d of %d (errors %d)' %
                    (item_type, updated, len(results), errors))
Ejemplo n.º 28
0
def multiprocess_all_chromosomes(func, cls, *args, **kwargs):
    '''
    Convenience method for splitting up queries based on tag id.
    '''
    processes = current_settings.ALLOWED_PROCESSES

    set_chromosome_lists(cls, use_table=kwargs.get('use_table', None))
    p = Pool(processes)

    try:
        for chr_list in current_settings.CHR_LISTS:
            p.apply_async(func, args=[cls, chr_list, ] + list(args))
        p.close()
        p.join()
    except Exception as e:
        print('Terminating pool.')
        p.terminate()
        raise e
Ejemplo n.º 29
0
	def propagatePrediction(self):
		
		sortedTargets = sorted(self.targetToTermToScore.keys())
		
		inputs = [self.targetToTermToScore[targeti] for targeti in sortedTargets]

		global go
		go=self.go
		
		p = Pool(processes=10)
		results= p.map(makeCompletePrediction, inputs, chunksize=20)
		p.close()
		p.join()
		
		for i, result in enumerate(results):
			self.targetToTermToScore[sortedTargets[i]] = result
		
		return self
Ejemplo n.º 30
0
    def multi_proc4(self, batch):
        start_time = datetime.datetime.now()

        sql = "select count(id) from records"
        count_result = db_connection.execute(sql)

        for row in count_result:
            count = row[0]
            break

        sql = "select id from records"
        result = db_connection.execute(sql)

        record_ids = []
        for idx, row in enumerate(result):
            if (idx % int(batch) == 0) or (idx == count - 1):
                if idx == 0:
                    some_records = []
                else:
                    record_ids.append(some_records)
                    some_records = []
            some_records.append(row[0])

        p = Pool(4)
        #Add id messages to input queue
        p.map(add_ids_to_queue, record_ids)

        #Read ids from input_queue, read message from DB and write it to output_queue
        worker_results = []
        p = Pool(4)
        for i in range(4):
            worker_results.append(p.apply_async(read_id_from_queue, ()))

        p.close()

        for r in worker_results:
            r.get()

        p.join() # This blocks until all the processes have finished

        end_time = datetime.datetime.now()
        time_taken = (end_time - start_time).total_seconds()

        return time_taken
Ejemplo n.º 31
0
def capture(interface,database_output_file,redraw_frequency,arp_resolve,
        dns_resolve,sender_lists,target_lists,color_profile,
        output_columns,display_false,pcap_output_file,force_sender,
        *args,**kwargs):

    dbfile = database_output_file

    osigint = signal.signal(signal.SIGINT,signal.SIG_IGN)
    pool = Pool(3)
    signal.signal(signal.SIGINT, osigint)

    try:

        # ==============
        # START SNIFFING
        # ==============

        '''
        The sniffer is started in a distinct process because Scapy
        will block forever when scapy.all.sniff is called. This allows
        us to interrupt execution of the sniffer by terminating the
        process.

        TODO: It may be easier to use threading. Pool methods were fresh
        to me at the time of original development.
        '''


        ptable = None
        pcount = 0
        # Handle new database file. When verbose, alert user that a new
        # capture must occur prior to printing results.

        arp_resolution = ('disabled','enabled')[arp_resolve]
        dns_resolution = ('disabled','enabled')[dns_resolve]

        print('\x1b[2J\x1b[H\33[F')
        print(logo+'\n')
        print(f'Capture interface: {interface}')
        print(f'ARP resolution:    {arp_resolution}')
        print(f'DNS resolution:    {dns_resolution}')
        sess = create_db(dbfile)

        # ======================================
        # CREATE AN IP FOR THE CURRENT INTERFACE
        # ======================================


        iface_mac, iface_ips = get_interfaces()[interface]
        for ip in iface_ips:
            ip = get_or_create_ip(ip,
                sess,
                mac_address=iface_mac)

        if not Path(dbfile).exists():
            print('- Initializing capture\n- This may take time depending '\
                'on network traffic and filter configurations')
        else:

            print(f'Requests analyzed: {pcount}\n')
            ptable = get_output_table(
                sess,
                sender_lists=sender_lists,
                target_lists=target_lists,
                dns_resolve=dns_resolve,
                color_profile=color_profile,
                arp_resolve=arp_resolve,
                columns=output_columns,
                display_false=display_false,
                force_sender=force_sender)
            print(ptable)

        # Cache packets that will be written to output file
        pkts = []
        sniff_result = None
        arp_resolve_result, dns_resolve_result = None, None

        # Loop eternally
        while True:


            # Handle sniff results
            if sniff_result and sniff_result.ready():

                packets = sniff_result.get()
                sniff_result = None

                # Capture packets for the output file
                if pcap_output_file and packets: pkts += packets

                if packets: pcount += packets.__len__()

                # Clear the previous table from the screen using
                # escape sequences screen
                # https://stackoverflow.com/questions/5290994/remove-and-replace-printed-items/5291044#5291044
                if ptable:
                    lcount = ptable.split('\n').__len__()+2
                    stdout.write('\033[F\033[K'*lcount)

                ptable = get_output_table(
                    sess,
                    sender_lists=sender_lists,
                    target_lists=target_lists,
                    dns_resolve=dns_resolve,
                    color_profile=color_profile,
                    arp_resolve=arp_resolve,
                    columns=output_columns,
                    display_false=display_false,
                    force_sender=force_sender)

                print(f'Requests analyzed: {pcount}\n')
                print(ptable)

            # Do sniffing
            elif not sniff_result:


                sniff_result = pool.apply_async(
                    async_sniff,
                    (
                        interface,
                        redraw_frequency,
                        sender_lists,
                        target_lists,
                        database_output_file,
                    )
                )

            # ==================
            # DNS/ARP RESOLUTION
            # ==================

            # Do reverse resolution
            if dns_resolve:

                # Reset dns resolution results
                if not dns_resolve_result or dns_resolve_result.ready():

                    to_resolve = sess.query(IP) \
                            .filter(IP.reverse_dns_attempted != True) \
                            .count()

                    if to_resolve:

                       dns_resolve_result = pool.apply_async(
                            reverse_dns_resolve_ips,
                            (database_output_file,)
                        )

            # Do ARP resolution
            if arp_resolve:

                if not arp_resolve_result or arp_resolve_result.ready():

                    to_resolve = sess.query(IP) \
                            .filter(IP.arp_resolve_attempted != True) \
                            .count()

                    if to_resolve:

                        arp_resolve_result = pool.apply_async(
                            arp_resolve_ips,
                                (interface, database_output_file,)
                            )

            sleep(.2)


    except KeyboardInterrupt:

        print('\n- CTRL^C Caught...')
        sess.close()

    finally:

        # ===================
        # HANDLE OUTPUT FILES
        # ===================

        if pcap_output_file: wrpcap(pcap_output_file,pkts)

        # =====================
        # CLOSE CHILD PROCESSES
        # =====================

        try:

            pool.close()

            if sniff_result:
                print('- Waiting for the sniffer process...',end='')
                sniff_result.wait(5)
                print('done')

            if dns_resolve_result:
                print('- Waiting for the DNS resolver process...',end='')
                dns_resolve_result.wait(5)
                print('done')

            if arp_resolve_result:
                print('- Waiting for the ARP resolver ocess...',end='')
                arp_resolve_result.wait(5)
                print('done')

        except KeyboardInterrupt:

            pool.terminate()

        pool.join()
Ejemplo n.º 32
0
# 如果还没满,就创建一个新的进程来执行该请求,否则,该请求就会等待,直到池中有进程结束,才会创建新的进程

import os
import time
from multiprocessing.pool import Pool
from random import random


def task(task_name):
    print("开始我的新任务啦....", task_name, os.getpid())
    starttime = time.time()
    time.sleep(random() * 3)
    endtime = time.time()
    #print("我的任务--{}--完成啦...耗时{},进程{}".format(task_name, endtime-starttime, os.getpid()))
    return "我的任务--{}--完成啦...耗时{},进程{}".format(task_name, endtime - starttime, os.getpid())


def callback_func(n):
    print(n)


if __name__ == "__main__":
    # 进程池
    pool = Pool(5)
    tasks = ["听音乐", "吃饭", "打游戏", "看孩子", "做饭", "跑步", "学习", "打架", "听音乐", "吃饭", "打游戏", "看孩子",
             "做饭", "跑步", "学习", "打架"]
    for t in tasks:
        pool.apply_async(task, args=(t,), callback = callback_func)  # 异步方式,非阻塞
    pool.close()  # 进程池添加结束
    pool.join()  # 使主进程阻塞
Ejemplo n.º 33
0
class PyRAMmp():
    '''
    The PyRAMmp class sets up and runs a multiprocessing pool to enable
    parallel PyRAM model runs.
    '''
    def __init__(self, processes=None, maxtasksperchild=None):
        '''
        Initialise the pool and variable lists.
        processes and maxtasksperchild are passed to the pool.
        '''

        self.pool = Pool(processes=processes,
                         maxtasksperchild=maxtasksperchild)
        self.results = []  # Results from PyRAM.run()
        self._outputs = [
        ]  # New outputs from PyRAM.run() for transfer to self.results
        self._waiting = []  # Waiting runs
        self._num_waiting = 0  # Number of waiting runs
        self._num_active = 0  # Number of active runs
        self._sleep_time = 1e-2  # Minimum sleep time between adding runs to pool
        self._new = True  # Flag to indicate ready for new set of runs

    def submit_runs(self, runs):
        '''
        Submit new runs to the pool as resources become available
        runs is a list of PyRAM input tuples (args, kwargs)
        '''

        # Add to waiting list
        for run in runs:
            self._waiting.append(run)
        self._num_waiting = len(self._waiting)

        # Check how many active runs have finished
        for _ in range(len(self._outputs)):
            run = self._outputs.pop(0)
            self.results.append(run)
            self._num_active -= 1

        num_start = self.pool._processes - self._num_active
        num_start = min(num_start, self._num_waiting)

        # Start new runs if processes are free
        for _ in range(num_start):
            run = self._waiting.pop(0)
            self.pool.apply_async(run_pyram,
                                  args=(run, ),
                                  callback=self._get_output)
            self._num_active += 1

        if self._new:
            self._new = False
            self._wait()

    def _wait(self):
        '''
        Wait for all submitted runs to complete.
        '''

        while self._num_active > 0:
            self.submit_runs([])
            sleep(self._sleep_time)

        self._new = True

    def close(self):
        '''
        Close the pool and wait for all processes to finish.
        '''

        self.pool.close()
        self.pool.join()

    def _get_output(self, output):
        '''
        Get a PyRAM output.
        '''

        self._outputs.append(output)

    def __del__(self):

        self.close()
Ejemplo n.º 34
0
        dftr = pd.DataFrame({'id': ids, 'train': 'train'})
        tdftr = pd.DataFrame({'id': ids, 'train': 'test'})
        train, test = DataProcess.train_test_between_subject(
            gdata, pd.concat((dftr, tdftr)),
            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
        DLogger.logger().debug("total points: " + str(get_total_pionts(train)))

        worker = GQL.get_instance(2, 1, {})
        train = DataProcess.merge_data(train)
        OptML.optimise(worker,
                       output_path,
                       train,
                       test,
                       global_iters=1000,
                       learning_rate=learning_rate)


if __name__ == '__main__':

    if len(sys.argv) == 2:
        n_proc = int(sys.argv[1])
    elif len(sys.argv) == 1:
        n_proc = 1
    else:
        raise Exception('invalid argument')

    p = Pool(n_proc)
    p.map(run_BD, range(len(configs)))
    p.close()  # no more tasks
    p.join()  # wrap up current tasks
Ejemplo n.º 35
0
import time
from multiprocessing.pool import Pool

min_val = float('inf')
min_item = None


def update_min(item):
    global min_val, min_item

    print('outside if', min_val, min_item)
    if item[0] < min_val:
        print(f'updatin min from {min_val} to {item[0]}')
        min_val = item[0]
        min_item = item[1]

    time.sleep(0.5)


if __name__ == '__main__':
    lst = [(4, 'a'), (2, 'b'), (1, 'c'), (0, 'd'), (3, 'f')]
    pool = Pool(processes=4)
    pool.map(update_min, lst)
    pool.close()
    pool.join()
    print(min_item, min_val)
Ejemplo n.º 36
0
def ensemble(training_output_folder1,
             training_output_folder2,
             output_folder,
             task,
             validation_folder,
             folds,
             allow_ensembling: bool = True):
    print("\nEnsembling folders\n", training_output_folder1, "\n",
          training_output_folder2)

    output_folder_base = output_folder
    output_folder = join(output_folder_base, "ensembled_raw")

    # only_keep_largest_connected_component is the same for all stages
    dataset_directory = join(preprocessing_output_dir, task)
    plans = load_pickle(join(training_output_folder1,
                             "plans.pkl"))  # we need this only for the labels

    files1 = []
    files2 = []
    property_files = []
    out_files = []
    gt_segmentations = []

    folder_with_gt_segs = join(dataset_directory, "gt_segmentations")
    # in the correct shape and we need the original geometry to restore the niftis

    for f in folds:
        validation_folder_net1 = join(training_output_folder1, "fold_%d" % f,
                                      validation_folder)
        validation_folder_net2 = join(training_output_folder2, "fold_%d" % f,
                                      validation_folder)

        if not isdir(validation_folder_net1):
            raise AssertionError(
                "Validation directory missing: %s. Please rerun validation with `nnUNet_train CONFIG TRAINER TASK FOLD -val --npz`"
                % validation_folder_net1)
        if not isdir(validation_folder_net2):
            raise AssertionError(
                "Validation directory missing: %s. Please rerun validation with `nnUNet_train CONFIG TRAINER TASK FOLD -val --npz`"
                % validation_folder_net2)

        # we need to ensure the validation was successful. We can verify this via the presence of the summary.json file
        if not isfile(join(validation_folder_net1, 'summary.json')):
            raise AssertionError(
                "Validation directory incomplete: %s. Please rerun validation with `nnUNet_train CONFIG TRAINER TASK FOLD -val --npz`"
                % validation_folder_net1)
        if not isfile(join(validation_folder_net2, 'summary.json')):
            raise AssertionError(
                "Validation directory missing: %s. Please rerun validation with `nnUNet_train CONFIG TRAINER TASK FOLD -val --npz`"
                % validation_folder_net2)

        patient_identifiers1_npz = [
            i[:-4]
            for i in subfiles(validation_folder_net1, False, None, 'npz', True)
        ]
        patient_identifiers2_npz = [
            i[:-4]
            for i in subfiles(validation_folder_net2, False, None, 'npz', True)
        ]

        # we don't do postprocessing anymore so there should not be any of that noPostProcess
        patient_identifiers1_nii = [
            i[:-7] for i in subfiles(validation_folder_net1,
                                     False,
                                     None,
                                     suffix='nii.gz',
                                     sort=True)
            if not i.endswith("noPostProcess.nii.gz")
            and not i.endswith('_postprocessed.nii.gz')
        ]
        patient_identifiers2_nii = [
            i[:-7] for i in subfiles(validation_folder_net2,
                                     False,
                                     None,
                                     suffix='nii.gz',
                                     sort=True)
            if not i.endswith("noPostProcess.nii.gz")
            and not i.endswith('_postprocessed.nii.gz')
        ]

        if not all(
            [i in patient_identifiers1_npz for i in patient_identifiers1_nii]):
            raise AssertionError(
                "Missing npz files in folder %s. Please run the validation for all models and folds with the '--npz' flag."
                % (validation_folder_net1))
        if not all(
            [i in patient_identifiers2_npz for i in patient_identifiers2_nii]):
            raise AssertionError(
                "Missing npz files in folder %s. Please run the validation for all models and folds with the '--npz' flag."
                % (validation_folder_net2))

        patient_identifiers1_npz.sort()
        patient_identifiers2_npz.sort()

        assert all([
            i == j
            for i, j in zip(patient_identifiers1_npz, patient_identifiers2_npz)
        ]), "npz filenames do not match. This should not happen."

        maybe_mkdir_p(output_folder)

        for p in patient_identifiers1_npz:
            files1.append(join(validation_folder_net1, p + '.npz'))
            files2.append(join(validation_folder_net2, p + '.npz'))
            property_files.append(join(validation_folder_net1, p) + ".pkl")
            out_files.append(join(output_folder, p + ".nii.gz"))
            gt_segmentations.append(join(folder_with_gt_segs, p + ".nii.gz"))

    p = Pool(default_num_threads)
    p.map(merge, zip(files1, files2, property_files, out_files))
    p.close()
    p.join()

    if not isfile(join(output_folder, "summary.json")) and len(out_files) > 0:
        aggregate_scores(tuple(zip(out_files, gt_segmentations)),
                         labels=plans['all_classes'],
                         json_output_file=join(output_folder, "summary.json"),
                         json_task=task,
                         json_name=task + "__" +
                         output_folder_base.split("/")[-1],
                         num_threads=default_num_threads)

    if allow_ensembling and not isfile(
            join(output_folder_base, "postprocessing.json")):
        # now lets also look at postprocessing. We cannot just take what we determined in cross-validation and apply it
        # here because things may have changed and may also be too inconsistent between the two networks
        determine_postprocessing(output_folder_base,
                                 folder_with_gt_segs,
                                 "ensembled_raw",
                                 "temp",
                                 "ensembled_postprocessed",
                                 default_num_threads,
                                 dice_threshold=0)

        out_dir_all_json = join(network_training_output_dir, "summary_jsons")
        json_out = load_json(
            join(output_folder_base, "ensembled_postprocessed",
                 "summary.json"))

        json_out["experiment_name"] = output_folder_base.split("/")[-1]
        save_json(
            json_out,
            join(output_folder_base, "ensembled_postprocessed",
                 "summary.json"))

        maybe_mkdir_p(out_dir_all_json)
        shutil_sol.copyfile(
            join(output_folder_base, "ensembled_postprocessed",
                 "summary.json"),
            join(out_dir_all_json,
                 "%s__%s.json" % (task, output_folder_base.split("/")[-1])))
Ejemplo n.º 37
0
    total = 0
    scores = {}
    commanders = [
        'examples.Greedy', 'examples.Balanced', 'examples.Random',
        'examples.Defender'
    ]
    maps = ['map00', 'map01', 'map10', 'map20']

    pairs = itertools.permutations(commanders, 2)
    games = list(itertools.product(maps, pairs))

    print "Running competition with %i commanders and %i maps, for a total of %i games.\n" % (
        len(commanders), len(maps), len(games))
    try:
        for map, results in p.map(run, games):
            for bot, score in results.items():
                scores.setdefault(bot, [0, 0])
                scores[bot][0] += score[0]
                scores[bot][1] += score[1]
            total += 1
    except KeyboardInterrupt:
        print "\nTerminating competition due to keyboard interrupt."
        p.terminate()
        p.join()
    else:
        print "\n%i total games run." % (total)
        for r, s in sorted(scores.items(), key=lambda i: -i[1][0] + i[1][1]):
            print "{}   for: {}, against: {}".format(
                r.replace('Commander', '').upper(), s[0], s[1])
        raw_input()
Ejemplo n.º 38
0
	def __call__(self, model, max_iter, data_dir, fnames, D_config, model_save_dir=None, save_every_iter=1000, full_batch=False):
		"""
		model: LSTM/LSTM_CNN/BiLSTM class, model for training, 
			model should be initialized/loaded before passing in
		max_iter: max iterations for training
		data_dir: .npz file dir
		fnames: list of file names for training/testing
		D_config: data loader config
		model_save_dir: string, folder dir where to save all models
		save_every_iter: int, save the model into model_save_dir every save_every_iter iterations
		"""
		# auto save model according to class name
		self.model_class = model.__class__.__name__

		self.D_config = D_config

		target_files = self.data_dir2target_files(data_dir, fnames)
		if target_files is None:
			return None

		if self.num_file_in_mem >= len(target_files):
			# if all heater data can fit in memory
			self.D_config["free_mem"] = False
		else:
			self.D_config["free_mem"] = True

		file_idx = 0
		it = 0

		# process based thread pool
		# API: https://docs.python.org/3/library/multiprocessing.html#module-multiprocessing
		pool = Pool(self.num_threads)
		load_flag = False

		print("[INFO] Notice that we are using multiprocessing to load files, so that child processes won't print out on ipython-notebook, which only monitor the parent process. Please check the terminal for more logging info.")
		self.cur_file_in_mem = 0
		while True:
			if self.num_file_in_mem >= len(target_files):
				# if all data in mem, should wait untill all the data is ready
				# otherwise will run multiple times on data that loaded first 
				if not load_flag:
					for one_file in target_files:
						pool.apply_async(self._newthread_helper, args=(one_file,), \
							callback=self._callback_helper, error_callback=self._error_helper)
					pool.close()
					# wait till all loaded
					pool.join()

					# flag up, then we wont load data again
					load_flag = True
			else:
				one_file = target_files[file_idx]
				if self.cur_file_in_mem < min(self.num_file_in_mem, len(target_files)):
					# start a new process loading one_file
					pool.apply_async(self._newthread_helper, args=(one_file,), \
						callback=self._callback_helper, error_callback=self._error_helper)
					self.cur_file_in_mem += 1
					file_idx = (file_idx + 1) % len(target_files)
					if file_idx == 0:
						# shuffle target_files if we finish one round on all of them
						random.shuffle(target_files)
				else:
					time.sleep(0.001)

			if len(self.DataPool) > 0:
				# some process returned Heater_Data into self.DataPool
				self.train_main(model, model_save_dir, save_every_iter, pool, target_files, max_iter, full_batch)
			else:
				time.sleep(0.001)
		pool.close()
		pool.join()
		# reset DataPool for another training/testing
		self.DataPool.clear()
Ejemplo n.º 39
0
    col1 = db1[website]
    col2 = db2[website]
    for i in col1.find():
        temp = dict()
        temp['url'] = i['url']
        temp['grabtime'] = i['grabtime']
        temp['website'] = i['website']
        temp['status'] = i['status']
        temp['pagetime'] = i['pagetime']
        col2.insert(temp)

    t_stop = time.time()
    print("执行完毕,耗时%0.2f" % (t_stop - t_start))


if __name__ == "__main__":

    po = Pool(10)

    for website in websites:
        po.apply_async(worker, (website, ))

    print("----start----")
    start = time.time()

    po.close()
    po.join()

    print("-----end-----")
    stop = time.time()
    print('总用时: %0.2f' % (stop - start))
Ejemplo n.º 40
0
 def star(self):
     process_pool = ProcessPool(processes=self.concurrency)
     process_pool.map(self.run, range(self.concurrency))
     process_pool.close()
     process_pool.join()
Ejemplo n.º 41
0
    def validate(self,
                 do_mirroring: bool = True,
                 use_sliding_window: bool = True,
                 step_size: float = 0.5,
                 save_softmax: bool = True,
                 use_gaussian: bool = True,
                 overwrite: bool = True,
                 validation_folder_name: str = 'validation_raw',
                 debug: bool = False,
                 all_in_gpu: bool = False,
                 segmentation_export_kwargs: dict = None,
                 run_postprocessing_on_folds: bool = True):

        current_mode = self.network.training
        self.network.eval()

        assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)"
        if self.dataset_val is None:
            self.load_dataset()
            self.do_split()

        if segmentation_export_kwargs is None:
            if 'segmentation_export_params' in self.plans.keys():
                force_separate_z = self.plans['segmentation_export_params'][
                    'force_separate_z']
                interpolation_order = self.plans['segmentation_export_params'][
                    'interpolation_order']
                interpolation_order_z = self.plans[
                    'segmentation_export_params']['interpolation_order_z']
            else:
                force_separate_z = None
                interpolation_order = 1
                interpolation_order_z = 0
        else:
            force_separate_z = segmentation_export_kwargs['force_separate_z']
            interpolation_order = segmentation_export_kwargs[
                'interpolation_order']
            interpolation_order_z = segmentation_export_kwargs[
                'interpolation_order_z']

        output_folder = join(self.output_folder, validation_folder_name)
        maybe_mkdir_p(output_folder)

        if do_mirroring:
            mirror_axes = self.data_aug_params['mirror_axes']
        else:
            mirror_axes = ()

        pred_gt_tuples = []

        export_pool = Pool(2)
        results = []

        transpose_backward = self.plans.get('transpose_backward')

        for k in self.dataset_val.keys():
            properties = load_pickle(self.dataset[k]['properties_file'])
            data = np.load(self.dataset[k]['data_file'])['data']

            # concat segmentation of previous step
            seg_from_prev_stage = np.load(
                join(self.folder_with_segs_from_prev_stage,
                     k + "_segFromPrevStage.npz"))['data'][None]

            print(data.shape)
            data[-1][data[-1] == -1] = 0
            data_for_net = np.concatenate(
                (data[:-1],
                 to_one_hot(seg_from_prev_stage[0], range(1,
                                                          self.num_classes))))

            softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax(
                data_for_net,
                do_mirroring=do_mirroring,
                mirror_axes=mirror_axes,
                use_sliding_window=use_sliding_window,
                step_size=step_size,
                use_gaussian=use_gaussian,
                all_in_gpu=all_in_gpu,
                mixed_precision=self.fp16)[1]

            if transpose_backward is not None:
                transpose_backward = self.plans.get('transpose_backward')
                softmax_pred = softmax_pred.transpose(
                    [0] + [i + 1 for i in transpose_backward])

            fname = Path(properties['list_of_data_files'][0]).parts[-1][:-12]

            if save_softmax:
                softmax_fname = join(output_folder, fname + ".npz")
            else:
                softmax_fname = None
            """There is a problem with python process communication that prevents us from communicating obejcts 
            larger than 2 GB between processes (basically when the length of the pickle string that will be sent is 
            communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long 
            enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually 
            patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will 
            then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either 
            filename or np.ndarray and will handle this automatically"""
            if np.prod(softmax_pred.shape) > (2e9 / 4 *
                                              0.85):  # *0.85 just to be save
                np.save(fname + ".npy", softmax_pred)
                softmax_pred = fname + ".npy"

            results.append(
                export_pool.starmap_async(
                    save_segmentation_nifti_from_softmax,
                    ((softmax_pred, join(output_folder, fname + ".nii.gz"),
                      properties, interpolation_order,
                      self.regions_class_order, None, None, softmax_fname,
                      None, force_separate_z, interpolation_order_z), )))

            pred_gt_tuples.append([
                join(output_folder, fname + ".nii.gz"),
                join(self.gt_niftis_folder, fname + ".nii.gz")
            ])

        _ = [i.get() for i in results]

        task = Path(self.dataset_directory).parts[-1]
        job_name = self.experiment_name
        _ = aggregate_scores(pred_gt_tuples,
                             labels=list(range(self.num_classes)),
                             json_output_file=join(output_folder,
                                                   "summary.json"),
                             json_name=job_name,
                             json_author="Fabian",
                             json_description="",
                             json_task=task)

        if run_postprocessing_on_folds:
            # in the old nnunet we would stop here. Now we add a postprocessing. This postprocessing can remove everything
            # except the largest connected component for each class. To see if this improves results, we do this for all
            # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will
            # have this applied during inference as well
            self.print_to_log_file("determining postprocessing")
            determine_postprocessing(self.output_folder,
                                     self.gt_niftis_folder,
                                     validation_folder_name,
                                     final_subf_name=validation_folder_name +
                                     "_postprocessed",
                                     debug=debug)
            # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed"
            # They are always in that folder, even if no postprocessing as applied!

        # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another
        # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be
        # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to
        # be used later
        gt_nifti_folder = join(self.output_folder_base, "gt_niftis")
        maybe_mkdir_p(gt_nifti_folder)
        for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"):
            success = False
            attempts = 0
            while not success and attempts < 10:
                try:
                    shutil.copy(f, gt_nifti_folder)
                    success = True
                except OSError:
                    attempts += 1
                    sleep(1)

        self.network.train(current_mode)
        export_pool.close()
        export_pool.join()
Ejemplo n.º 42
0
 def join(self):
     Pool.join(self)
     for r in self.int_results:
         # return values were already handled in the callbacks, but asking
         # for them might raise exceptions which would otherwise be lost
         self.results.append(r.get())
def parallel_process(func, dataset):
    pool = Pool(4)
    result = [pool.apply_async(func, data) for data in dataset]
    pool.close()
    pool.join()
    return [_result.get() for _result in result]
Ejemplo n.º 44
0
def read_files_batched(filenames,
                       file_batch_size=8192,
                       file_batch_shuffle=False,
                       max_batches=math.inf,
                       return_mode='array',
                       n_jobs=-1,
                       max_batches_in_queue=1000,
                       max_queue_wait_seconds=0.5,
                       pd_kwargs={}):
    """Read multiple files in parallel."""
    def listify_generator(func, *args, **kwargs):
        listified_generator = list(func(*args, **kwargs))
        return (listified_generator)

    if n_jobs == -1:
        n_jobs = cpu_count() - 1
        n_jobs = min((n_jobs, len(filenames)))

    # Parallel
    if n_jobs > 1:

        # Batch queue, appended in callback
        batch_queue = deque(maxlen=max_batches_in_queue)

        def callback(batch):
            while True:
                if len(batch_queue) < max_batches_in_queue:
                    batch_queue.append(batch)
                    break
                else:
                    time.sleep(0.1)

        # Create processes
        p = Pool(n_jobs)
        for filename in filenames:
            p.apply_async(listify_generator, (read_file_batched, filename),
                          dict(file_batch_size=file_batch_size,
                               file_batch_shuffle=file_batch_shuffle,
                               max_batches=max_batches,
                               return_mode=return_mode,
                               pd_kwargs=pd_kwargs),
                          callback=callback)

        # Yield from queue
        keep_trying = True
        last_non_empty_batch = None
        while keep_trying:
            if len(batch_queue) > 0:
                for batch in batch_queue.popleft():
                    yield batch
                last_non_empty_batch = time.clock()

            if len(batch_queue) == 0:
                if last_non_empty_batch is not None:
                    if time.clock(
                    ) - last_non_empty_batch >= max_queue_wait_seconds:
                        keep_trying = False
        p.close()
        p.join()

    # Single process
    else:
        for filename in filenames:
            for batch in read_file_batched(
                    filename,
                    file_batch_size=file_batch_size,
                    file_batch_shuffle=file_batch_shuffle,
                    max_batches=max_batches,
                    return_mode=return_mode,
                    pd_kwargs=pd_kwargs):
                yield batch
Ejemplo n.º 45
0
def peak__partition(v,
                    s1,
                    s2,
                    find_maxima=True,
                    partition_op=None,
                    multiprocessing_process_num=0):
    """
    partition the volume then detect peaks for each partition
    note that this will result in redundant peaks!!
    Clean up must be done afterwards!!
    """
    import aitom.image.vol.partition as IVP

    if multiprocessing_process_num > 0:
        pool = Pool(processes=min(multiprocessing_process_num,
                                  multiprocessing.cpu_count()))
    else:
        pool = None

    if partition_op is None:
        # in this case, just generate a single partition
        siz_max = max(v.shape)
        partition_op = {
            'nonoverlap_width': siz_max * 2,
            'overlap_width': siz_max * 2
        }

    b = IVP.gen_bases(v.shape,
                      nonoverlap_width=partition_op['nonoverlap_width'],
                      overlap_width=partition_op['overlap_width'])
    print('partition num', b.shape)

    ps = []

    if pool is not None:
        pool_re = []
        for i0 in range(b.shape[0]):
            for i1 in range(b.shape[1]):
                for i2 in range(b.shape[2]):
                    bp = N.squeeze(b[i0, i1, i2, :, :])
                    pool_re.append(
                        pool.apply_async(
                            func=peak__partition__single_job,
                            kwds={
                                'v':
                                v[bp[0, 0]:bp[0, 1], bp[1, 0]:bp[1, 1],
                                  bp[2, 0]:bp[2, 1]],
                                's1':
                                s1,
                                's2':
                                s2,
                                'base':
                                bp,
                                'find_maxima':
                                find_maxima,
                                'partition_id': (i0, i1, i2),
                                'save_vg':
                                (partition_op['save_vg']
                                 if 'save_vg' in partition_op else False)
                            }))

        for pool_re_t in pool_re:
            ppsj = pool_re_t.get(9999999)
            ps.extend(ppsj['ps'])
            print('\r', ppsj['partition_id'], '                     ')
            sys.stdout.flush()

        pool.close()
        pool.join()
        del pool

    else:

        for i0 in range(b.shape[0]):
            for i1 in range(b.shape[1]):
                for i2 in range(b.shape[2]):
                    bp = N.squeeze(b[i0, i1, i2, :, :])
                    ppsj = peak__partition__single_job(
                        v=v[bp[0, 0]:bp[0, 1], bp[1, 0]:bp[1, 1],
                            bp[2, 0]:bp[2, 1]],
                        s1=s1,
                        s2=s2,
                        base=bp,
                        find_maxima=find_maxima,
                        partition_id=(i0, i1, i2),
                        save_vg=(partition_op['save_vg']
                                 if 'save_vg' in partition_op else False))
                    ps.extend(ppsj['ps'])
                    print('\r', ppsj['partition_id'], '                     ')
                    sys.stdout.flush()

    # order peaks in ps according to values
    if find_maxima:
        ps = sorted(ps, key=lambda _: (-_['val']))
    else:
        ps = sorted(ps, key=lambda _: _['val'])

    return ps
Ejemplo n.º 46
0
def main(argv):

    print('python {:s} {:s}'.format(' '.join(sys.argv),
                                    str(datetime.now())[:20]))

    if RUN_TEST_ENH == 1:

        runscripts(TEST_ENH, SAMPLE_ID, TEST_PATH,\
        SPECIES, ANALYZE_AGE, ANALYZE_BREAKS)

    if ANALYZE_SHUFFLE == 1:

        # create pool and run simulations in parallel

        shuffle_id = "shuf-" + (SAMPLE_ID).split("_enhancers")[0]
        shuffle_path = os.path.join(TEST_PATH, SAMPLE_ID, "shuffle")

        if os.path.exists(shuffle_path) == False:
            os.mkdir(shuffle_path)

        test_enh_formatted = preformatBedfile(
            TEST_ENH, SAMPLE_ID,
            TEST_PATH)  # format the enhancer bed file and sort

        pool = Pool(NUM_THREADS)
        partial_calcExp = partial(calculateExpected,\
                                      test_enh_formatted, SAMPLE_ID,\
                                      shuffle_path, SPECIES)

        exp_sum_list = pool.map(partial_calcExp,
                                [i for i in range(ITERATIONS)])
        pool.close()
        pool.join()

        if os.path.exists(shuffle_path) == False:
            os.mkdir(shuffle_path)

        if "enh_ages.bed" in TEST_ENH:

            print("SHUFFLE_ID", shuffle_id)
            runscripts(TEST_ENH, shuffle_id, TEST_PATH,\
            SPECIES, ANALYZE_AGE, ANALYZE_BREAKS)

        elif "enh_ages.bed" not in TEST_ENH:

            shuf_fs = glob.glob(f"{shuffle_path}/{shuffle_id}*.bed"
                                )  # get all the shuffle files

            val = 0

            for shuf_f in shuf_fs:  # age each shuffle file.

                iter_id = shuffle_id + "-" + str(val)
                print("iter_id", iter_id)
                runscripts(shuf_f, iter_id, shuffle_path,\
                SPECIES, ANALYZE_AGE, ANALYZE_BREAKS)

                val += 1
        else:
            print("sarah, address these problems with shuffle not running")

        rm_cmd = f"rm {TEST_PATH}/cut-*{SAMPLE_ID}*.bed"
        os.system(rm_cmd)
Ejemplo n.º 47
0
def concurrency_run(num: int):
    pool = Pool(num)
    for k in range(num):
        pool.apply_async(func=run)
    pool.close()
    pool.join()
Ejemplo n.º 48
0
"""
# author Liu shi hao
# date: 2019/12/11 15:43
# file_name: process_pool_test
进程池
"""
import os
import time
from multiprocessing.pool import Pool


# 进程应该完成的任务
def task():
    for i in range(3):
        print(os.getpid(), i)
        time.sleep(0.2)


if __name__ == '__main__':

    pool1 = Pool(3)
    for i in range(15):
        # pool1.apply_async(task)  # 异步
        pool1.apply(task)  # 同步

    pool1.close()
    pool1.join()
    print('finish')
    def evaluate_csv_right(self):
        """
        评估CSV文件
        """
        # in_file_name = 'test_400_right'     # 测试400
        # in_file_name = 'test_1000_right'    # 测试1000
        # in_file_name = 'random_1w_urls'    # 测试1w
        # in_file = os.path.join(DATA_DIR, 'test_urls_files', in_file_name + ".csv")

        # in_file_name = "sanghu.zj_question_cut_sampled_jueying_url_5k_1229"  # 整页影印
        # in_file_name = "dump_write_pure.out"  # 纯手写
        # in_file_name = "7_train_ori.out"  # 整页query
        # in_file_name = "HW_TRAIN.out"
        # in_file_name = "biaozhu_fix.check"
        # in_file_name = "biaozhu_csv_out"
        # in_file_name = "random_1w_urls"  # 普通query
        # in_file_name = "zjw_url"  # 小图
        # in_file_name = "xiaotu_labeled_25w_165512"  # 小图
        in_file_name = "zjw_imgs_20210427_urls"  # 小图

        in_file = os.path.join(DATA_DIR, 'page_dataset_files',
                               in_file_name + ".txt")  # 输入文件

        print('[Info] in_file: {}'.format(in_file))

        data_lines = read_file(in_file)
        print('[Info] 样本总量: {}'.format(len(data_lines)))
        if len(data_lines) == 0:
            print('[Info] 文件路径错误: {}'.format(in_file))
            return

        # 测试文件
        n = 10000
        if len(data_lines) > n:
            random.seed(47)
            # random.seed(89)
            random.shuffle(data_lines)  # 随机生成
            data_lines = data_lines[:n]

        print('[Info] 样本数量: {}'.format(len(data_lines)))

        # 测试文件
        time_str = get_current_time_str()
        out_name = 'check_{}.{}.csv'.format(in_file_name, time_str)
        out_dir = os.path.join(DATA_DIR, "check_dir_20210329")
        mkdir_if_not_exist(out_dir)
        out_file = os.path.join(out_dir, out_name)

        # 筛选文件
        # out_dir = os.path.join(DATA_DIR, "xiaotu_dir")
        # in_file_name = '{}_good.txt'.format(in_file_name)
        # mkdir_if_not_exist(out_dir)
        # out_file = os.path.join(out_dir, in_file_name)

        # write_dir = os.path.join(out_dir, 'write_dir_{}'.format(time_str))
        # mkdir_if_not_exist(write_dir)
        write_dir = None

        pool = Pool(processes=100)
        for idx, data_line in enumerate(data_lines):
            # 方案1
            # if idx == 0:
            #     continue
            # url, r_angle = data_line.split(',')

            # 方案2
            url, r_angle = data_line, 0

            # name = url.split('/')[-1].split('.')[0]
            # file_name_x = in_file_name.split('.')[0]
            # url = "https://sm-transfer.oss-cn-hangzhou.aliyuncs.com/zhengsheng.wcl/problems_rotation/" \
            #       "datasets/{}_x/{}.jpg".format(file_name_x, name)

            try:
                pool.apply_async(OnlineEvaluation.process_thread_right,
                                 (idx, url, r_angle, out_file, write_dir))
                # OnlineEvaluation.process_thread_right(idx, url, r_angle, out_file, write_dir)

                # 筛选图像
                # pool.apply_async(OnlineEvaluation.process_save_img_url, (idx, url, r_angle, out_file, write_dir))
                # OnlineEvaluation.process_save_img_url(idx, url, r_angle, out_file, write_dir)
            except Exception as e:
                print('[Info] Error URL: {}'.format(url))
                continue
            # print('[Info] URL: {}'.format(url))
            # OnlineEvaluation.process_thread_right(idx, url, r_angle, out_file, write_dir)

        pool.close()
        pool.join()

        print('[Info] 写入文件: {}'.format(out_file))
Ejemplo n.º 50
0
class ProcessPoolStrategy(ParallelStrategy, _PoolRunnableStrategy,
                          _Resultable):

    _Processors_Pool: Pool = None
    _Processors_List: List[Union[ApplyResult, AsyncResult]] = None

    def __init__(self, pool_size: int):
        super().__init__(pool_size=pool_size)

    def initialization(self,
                       queue_tasks: Optional[Union[_BaseQueueTask,
                                                   _BaseList]] = None,
                       features: Optional[Union[_BaseFeatureAdapterFactory,
                                                _BaseList]] = None,
                       *args,
                       **kwargs) -> None:

        super(ProcessPoolStrategy,
              self).initialization(queue_tasks=queue_tasks,
                                   features=features,
                                   *args,
                                   **kwargs)

        # Activate multiprocessing.managers.BaseManager server
        activate_manager_server()

        # Initialize and build the Processes Pool.
        __pool_initializer: Callable = kwargs.get("pool_initializer", None)
        __pool_initargs: IterableType = kwargs.get("pool_initargs", None)
        self._Processors_Pool = Pool(processes=self.pool_size,
                                     initializer=__pool_initializer,
                                     initargs=__pool_initargs)

    def apply(self,
              tasks_size: int,
              function: Callable,
              args: Tuple = (),
              kwargs: Dict = {}) -> None:
        self.reset_result()
        __process_running_result = None

        self._Processors_List = [
            self._Processors_Pool.apply(func=function, args=args, kwds=kwargs)
            for _ in range(tasks_size)
        ]

        for _p in self._Processors_List:
            try:
                __process_running_result = _p
                __exception = None
                __process_run_successful = True
            except Exception as e:
                __exception = e
                __process_run_successful = False

            # Save Running result state and Running result value as dict
            self._result_saving(successful=__process_run_successful,
                                result=__process_running_result,
                                exception=__exception)

    def async_apply(self,
                    tasks_size: int,
                    function: Callable,
                    args: Tuple = (),
                    kwargs: Dict = {},
                    callback: Callable = None,
                    error_callback: Callable = None) -> None:

        self.reset_result()
        self._Processors_List = [
            self._Processors_Pool.apply_async(func=function,
                                              args=args,
                                              kwds=kwargs,
                                              callback=callback,
                                              error_callback=error_callback)
            for _ in range(tasks_size)
        ]

        for process in self._Processors_List:
            _process_running_result = None
            _process_run_successful = None
            _exception = None

            try:
                _process_running_result = process.get()
                _process_run_successful = process.successful()
            except Exception as e:
                _exception = e
                _process_run_successful = False

            # Save Running result state and Running result value as dict
            self._result_saving(successful=_process_run_successful,
                                result=_process_running_result,
                                exception=_exception)

    def apply_with_iter(self,
                        functions_iter: List[Callable],
                        args_iter: List[Tuple] = None,
                        kwargs_iter: List[Dict] = None) -> None:
        self.reset_result()
        __process_running_result = None

        if args_iter is None:
            args_iter = [() for _ in functions_iter]

        if kwargs_iter is None:
            kwargs_iter = [{} for _ in functions_iter]

        self._Processors_List = [
            self._Processors_Pool.apply(func=_func, args=_args, kwds=_kwargs)
            for _func, _args, _kwargs in zip(functions_iter, args_iter,
                                             kwargs_iter)
        ]

        for prcoess in self._Processors_List:
            try:
                __process_running_result = prcoess
                __exception = None
                __process_run_successful = True
            except Exception as e:
                __exception = e
                __process_run_successful = False

            # Save Running result state and Running result value as dict
            self._result_saving(successful=__process_run_successful,
                                result=__process_running_result,
                                exception=__exception)

    def async_apply_with_iter(
            self,
            functions_iter: List[Callable],
            args_iter: List[Tuple] = None,
            kwargs_iter: List[Dict] = None,
            callback_iter: List[Callable] = None,
            error_callback_iter: List[Callable] = None) -> None:

        self.reset_result()

        if args_iter is None:
            args_iter = [() for _ in functions_iter]

        if kwargs_iter is None:
            kwargs_iter = [{} for _ in functions_iter]

        if callback_iter is None:
            callback_iter = [None for _ in functions_iter]

        if error_callback_iter is None:
            error_callback_iter = [None for _ in functions_iter]

        self._Processors_List = [
            self._Processors_Pool.apply_async(func=_func,
                                              args=_args,
                                              kwds=_kwargs,
                                              callback=_callback,
                                              error_callback=_error_callback)
            for _func, _args, _kwargs, _callback, _error_callback in zip(
                functions_iter, args_iter, kwargs_iter, callback_iter,
                error_callback_iter)
        ]

        for process in self._Processors_List:
            _process_running_result = None
            _process_run_successful = None
            _exception = None

            try:
                _process_running_result = process.get()
                _process_run_successful = process.successful()
            except Exception as e:
                _exception = e
                _process_run_successful = False

            # Save Running result state and Running result value as dict
            self._result_saving(successful=_process_run_successful,
                                result=_process_running_result,
                                exception=_exception)

    def map(self,
            function: Callable,
            args_iter: IterableType = (),
            chunksize: int = None) -> None:
        self.reset_result()
        _process_running_result = None

        try:
            _process_running_result = self._Processors_Pool.map(
                func=function, iterable=args_iter, chunksize=chunksize)
            _exception = None
            _process_run_successful = True
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def async_map(self,
                  function: Callable,
                  args_iter: IterableType = (),
                  chunksize: int = None,
                  callback: Callable = None,
                  error_callback: Callable = None) -> None:

        self.reset_result()

        _process_running_result = None
        _exception = None

        _map_result = self._Processors_Pool.map_async(
            func=function,
            iterable=args_iter,
            chunksize=chunksize,
            callback=callback,
            error_callback=error_callback)

        try:
            _process_running_result = _map_result.get()
            _process_run_successful = _map_result.successful()
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def map_by_args(self,
                    function: Callable,
                    args_iter: IterableType[IterableType] = (),
                    chunksize: int = None) -> None:
        self.reset_result()
        _process_running_result = None

        try:
            _process_running_result = self._Processors_Pool.starmap(
                func=function, iterable=args_iter, chunksize=chunksize)
            _exception = None
            _process_run_successful = True
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def async_map_by_args(self,
                          function: Callable,
                          args_iter: IterableType[IterableType] = (),
                          chunksize: int = None,
                          callback: Callable = None,
                          error_callback: Callable = None) -> None:

        self.reset_result()
        _map_result = self._Processors_Pool.starmap_async(
            func=function,
            iterable=args_iter,
            chunksize=chunksize,
            callback=callback,
            error_callback=error_callback)
        _process_running_result = _map_result.get()
        _process_run_successful = _map_result.successful()

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=None)

    def imap(self,
             function: Callable,
             args_iter: IterableType = (),
             chunksize: int = 1) -> None:
        self.reset_result()
        _process_running_result = None

        try:
            imap_running_result = self._Processors_Pool.imap(
                func=function, iterable=args_iter, chunksize=chunksize)
            _process_running_result = [
                result for result in imap_running_result
            ]
            _exception = None
            _process_run_successful = True
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def imap_unordered(self,
                       function: Callable,
                       args_iter: IterableType = (),
                       chunksize: int = 1) -> None:
        self.reset_result()
        _process_running_result = None

        try:
            imap_running_result = self._Processors_Pool.imap_unordered(
                func=function, iterable=args_iter, chunksize=chunksize)
            _process_running_result = [
                result for result in imap_running_result
            ]
            _exception = None
            _process_run_successful = True
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def _result_saving(self, successful: bool, result: List,
                       exception: Exception) -> None:
        _process_result = {
            "successful": successful,
            "result": result,
            "exception": exception
        }
        self._Processors_Running_Result.append(_process_result)

    def close(self) -> None:
        self._Processors_Pool.close()
        self._Processors_Pool.join()

    def terminal(self) -> None:
        self._Processors_Pool.terminate()

    def get_result(self) -> List[_ProcessPoolResult]:
        return self.result()

    def _saving_process(self) -> List[_ProcessPoolResult]:
        _pool_results = []
        for __result in self._Processors_Running_Result:
            _pool_result = _ProcessPoolResult()
            _pool_result.is_successful = __result["successful"]
            _pool_result.data = __result["result"]
            _pool_result.exception = __result["exception"]
            _pool_results.append(_pool_result)
        return _pool_results
def evaluate_regions(folder_predicted: str,
                     folder_gt: str,
                     regions: dict,
                     processes=default_num_threads):
    region_names = list(regions.keys())
    files_in_pred = subfiles(folder_predicted, suffix='.nii.gz', join=False)
    files_in_gt = subfiles(folder_gt, suffix='.nii.gz', join=False)
    have_no_gt = [i for i in files_in_pred if i not in files_in_gt]
    assert len(
        have_no_gt
    ) == 0, "Some files in folder_predicted have not ground truth in folder_gt"
    have_no_pred = [i for i in files_in_gt if i not in files_in_pred]
    if len(have_no_pred) > 0:
        print(
            "WARNING! Some files in folder_gt were not predicted (not present in folder_predicted)!"
        )

    files_in_gt.sort()
    files_in_pred.sort()

    # run for all cases
    full_filenames_gt = [folder_gt + "/" + i for i in files_in_pred]
    full_filenames_pred = [folder_predicted + "/" + i for i in files_in_pred]

    p = Pool(processes)
    res = p.starmap(
        evaluate_case,
        zip(full_filenames_pred, full_filenames_gt,
            [list(regions.values())] * len(files_in_gt)))
    p.close()
    p.join()

    all_results = {r: [] for r in region_names}
    with open(folder_predicted + "/" + 'summary.csv', 'w') as f:
        f.write("casename")
        for r in region_names:
            f.write(",%s" % r)
        f.write("\n")
        for i in range(len(files_in_pred)):
            f.write(files_in_pred[i][:-7])
            result_here = res[i]
            for k, r in enumerate(region_names):
                dc = result_here[k]
                f.write(",%02.4f" % dc)
                all_results[r].append(dc)
            f.write("\n")

        f.write('mean')
        for r in region_names:
            f.write(",%02.4f" % np.nanmean(all_results[r]))
        f.write("\n")
        f.write('median')
        for r in region_names:
            f.write(",%02.4f" % np.nanmedian(all_results[r]))
        f.write("\n")

        f.write('mean (nan is 1)')
        for r in region_names:
            tmp = np.array(all_results[r])
            tmp[np.isnan(tmp)] = 1
            f.write(",%02.4f" % np.mean(tmp))
        f.write("\n")
        f.write('median (nan is 1)')
        for r in region_names:
            tmp = np.array(all_results[r])
            tmp[np.isnan(tmp)] = 1
            f.write(",%02.4f" % np.median(tmp))
        f.write("\n")
Ejemplo n.º 52
0
    def fit(self,
            X,
            y,
            mask,
            subset=None,
            num_workers=1,
            queue_len=2,
            chunk_size=10000):
        """Fit the model according to the given training data.
        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.
        y : array-like, shape = [n_samples]
            Target vector relative to X
        mask : array-like, shape = [n_samples]
            Control group mask vector relative to X, 
            where True (1) - control, False (0) - test
        sample_weight : array-like, shape = [n_samples], optional
            Array of weights that are assigned to individual
            samples. If not provided,
            then each sample is given unit weight.
        Returns
        -------
        self : object
        """
        try:
            self.X_shape = X.shape
            assert (len(self.X_shape) == 2)
            self.num_samples = self.X_shape[0]
            self.num_features = self.X_shape[1]

            if self.verbose == 1:
                print('parenclitic_graphs')
                sys.stdout.flush()

            if subset is None:
                self.partition.fit(self.num_features)
            else:
                self.partition.fit(subset)

            if not self.pair_filter is None:
                self.pair_filter.fit(X, mask, self.partition)
                self.pairs = self.pair_filter
            else:
                self.pairs = self.partition

            global num_done, num_pairs
            num_done = 0
            num_pairs = len(self.pairs)
            each_progress = int(np.sqrt(num_pairs + 0.5))
            if self.progress_bar:
                from tqdm import tqdm
                progress_bar = tqdm(total=num_pairs)
            #global fit
            #globals()['X'] = X
            #globals()['y'] = y
            #globals()['mask'] = mask
            fit = self.kernel.fit
            M, D, E = [], [], []

            need_parallel = num_workers > 1
            my_parallel_calc = parallel_calc(self.verbose)
            if need_parallel:
                global done_tasks, ready
                pool = Pool(num_workers,
                            initializer=my_parallel_calc.init,
                            initargs=(X, y, mask, self.kernel))
                done_tasks = 0
                ready = Semaphore(num_workers * queue_len)
            else:
                my_parallel_calc.init(X, y, mask, self.kernel)

            def upd_graph(res):
                global num_done, done_tasks, ready
                if self.verbose == 1:
                    print('upd_graphs')
                    sys.stdout.flush()
                if not type(res) is int:
                    for cur in res:
                        if not cur is None:
                            m, d, i, j = cur
                            #m, d = res.get_edges()
                            if m.any():
                                M.append(m)
                                D.append(d)
                                E.append([i, j])
                    res = len(res)

                if need_parallel:
                    done_tasks += 1
                    ready.release()

                if self.progress_bar:
                    progress_bar.set_description('Number of edges: %i' %
                                                 len(M),
                                                 refresh=False)
                    progress_bar.update(res)

                num_done += 1
                if num_done % each_progress == 0 or num_done == num_pairs:
                    stop = timeit.default_timer()
                    if self.verbose == 1:
                        print('Graph for', num_done, 'pairs calculated in',
                              stop - start)
                        sys.stdout.flush()

            if self.verbose == 1:
                print('start iterate')
                sys.stdout.flush()
            start = timeit.default_timer()
            num_tasks = 0
            for ids in chunked_iterable(self.pairs, chunk_size):
                #if not self.pair_filter is None:
                #    if self.pair_filter.is_filtered(i, j):
                #        continue

                num_tasks += 1
                if need_parallel:
                    '''
                    if self.verbose == 1:
                        print('acquire')
                        sys.stdout.flush()
                    '''
                    ready.acquire()
                    '''
                    if self.verbose == 1:
                        print('apply_async')
                        sys.stdout.flush()
                    '''
                    #pool.apply_async(self.kernel.fit, args = (X[:, i], X[:, j], y, mask), callback = upd_graph)

                    #pool.apply_async(my_parallel_calc.calc_links, args = (i, j), callback = upd_graph) #

                    pool.apply_async(my_parallel_calc.calc_batch,
                                     args=(np.array(ids), ),
                                     callback=upd_graph)  #
                    pass
                else:
                    if self.verbose == 1:
                        print('calc batch')
                        sys.stdout.flush()
                    #upd_graph(self.kernel.fit(X[:, i], X[:, j], y, mask))
                    #upd_graph(len(ids))
                    #upd_graph(my_parallel_calc.calc_links(i, j))

                    upd_graph(my_parallel_calc.calc_batch(np.array(ids)))
                    pass

            if need_parallel:
                while done_tasks < num_tasks:
                    ready.acquire()

                pool.close()
                pool.join()

            if self.verbose == 1:
                print('ready done')
                sys.stdout.flush()

            if self.progress_bar:
                progress_bar.close()
            if M == []:
                self.M = np.zeros((self.num_samples, 0), dtype=np.bool)
                self.D = np.zeros((self.num_samples, 0), dtype=np.float32)
                self.E = np.zeros((0, 2), dtype=np.float32)
            else:
                self.M = np.array(M).T
                self.D = np.array(D).T
                self.E = np.array(E)
            self.is_fitted = True
        except:
            if self.progress_bar:
                progress_bar.close()
            raise
        return self
Ejemplo n.º 53
0
    except:
        print('Failed to save image')
        
#定义主函数,加入偏移数offset,使用线程池下载
from multiprocessing.pool import Pool

def main(offset):
    json=get_page_jrtt(offset)
    for item in get_images_jrtt(json):
        save_image_jrtt(item)

#起始,结束页
start=0
end=20

if __name__=='__main__':
    print('开始下载图片,请稍等..')
    pool=Pool()
    groups=([x*20 for x in range(start,end)])
    pool.map(main,groups)  #创建线程池,参数为函数名和传入参数
    pool.close()
    pool.join()   #调用join之前,先调用close函数,否则会出错。执行完close后不会有新的进程加入到pool,join函数等待所有子进程结束
    print('图片下载完成.')







Ejemplo n.º 54
0
    local_image_url = item.get('image')
    new_image_url = local_image_url.replace('list', 'large')
    r = requests.get('http:' + new_image_url)
    if r.status_code == 200:
        file_path = img_path + os.path.sep + '{0}.{1}'.format(
            md5(r.content).hexdigest(), 'jpg')
        if not os.path.exists(file_path):
            with open(file_path, 'wb') as f:
                f.write(r.content)


def saveToMongo(item):
    if db[MONGO_TABLE].insert(item):
        print('储存到MONGODB成功', item)
    return False


def main(offset):
    json = getPage(offset)
    for item in getImage(json):
        saveImage(item)
        saveToMongo(item)


if __name__ == '__main__':
    pool = Pool()
    groups = [x * 20 for x in range(2)]  #爬取五页
    pool.map(main, groups)
    pool.close()  #关闭进程池(pool),使其不在接受新的任务
    pool.join()  #主进程阻塞等待子进程的退出
Ejemplo n.º 55
0
def _execute_sub_tasks(task_id, params, sig_content, verbosity, runmode,
                       sigmode, monitor_interval, resource_monitor_interval):
    '''If this is a master task, execute as individual tasks'''
    m = ProcessMonitor(
        task_id,
        monitor_interval=monitor_interval,
        resource_monitor_interval=resource_monitor_interval,
        max_walltime=params.sos_dict['_runtime'].get('max_walltime', None),
        max_mem=params.sos_dict['_runtime'].get('max_mem', None),
        max_procs=params.sos_dict['_runtime'].get('max_procs', None),
        sos_dict=params.sos_dict)
    m.start()

    env.logger.info(f'{task_id} ``started``')

    master_out = os.path.join(os.path.expanduser('~'), '.sos', 'tasks',
                              task_id + '.out')
    master_err = os.path.join(os.path.expanduser('~'), '.sos', 'tasks',
                              task_id + '.err')
    # if this is a master task, calling each sub task
    with open(master_out, 'wb') as out, open(master_err, 'wb') as err:

        def copy_out_and_err(result):
            tid = result['task']
            out.write(
                f'{tid}: {"completed" if result["ret_code"] == 0 else "failed"}\n'
                .encode())
            if 'output' in result:
                out.write(f'output: {result["output"]}\n'.encode())
            sub_out = os.path.join(os.path.expanduser('~'), '.sos', 'tasks',
                                   tid + '.out')
            if os.path.isfile(sub_out):
                with open(sub_out, 'rb') as sout:
                    out.write(sout.read())
                try:
                    os.remove(sub_out)
                except Exception as e:
                    env.logger.warning(f'Failed to remove {sub_out}: {e}')

            sub_err = os.path.join(os.path.expanduser('~'), '.sos', 'tasks',
                                   tid + '.err')
            if 'exception' in result:
                err.write(str(result['exception']).encode())
            err.write(
                f'{tid}: {"completed" if result["ret_code"] == 0 else "failed"}\n'
                .encode())
            if os.path.isfile(sub_err):
                with open(sub_err, 'rb') as serr:
                    err.write(serr.read())
                try:
                    os.remove(sub_err)
                except Exception as e:
                    env.logger.warning(f'Failed to remove {sub_err}: {e}')

            # remove other files as well
            try:
                remove_task_files(tid, ['.out', '.err'])
            except Exception as e:
                env.logger.debug(f'Failed to remove files {tid}: {e}')

        if params.num_workers > 1:
            from multiprocessing.pool import Pool
            p = Pool(params.num_workers)
            results = []
            for t in params.task_stack:
                results.append(
                    p.apply_async(_execute_task,
                                  ((*t, {
                                      t[0]: sig_content.get(t[0], {})
                                  }), verbosity, runmode, sigmode, None, None),
                                  callback=copy_out_and_err))
            for idx, r in enumerate(results):
                results[idx] = r.get()
            p.close()
            p.join()
            # we wait for all results to be ready to return or raise
            # but we only raise exception for one of the subtasks
            # for res in results:
            #     if 'exception' in res:
            #         failed = [x.get("task", "")
            #                   for x in results if "exception" in x]
            #         env.logger.error(
            #             f'{task_id} ``failed`` due to failure of subtask{"s" if len(failed) > 1 else ""} {", ".join(failed)}')
            #         return {'ret_code': 1, 'exception': res['exception'], 'task': task_id}
        else:
            results = []
            for tid, tdef in params.task_stack:
                # no monitor process for subtasks
                res = _execute_task((tid, tdef, {
                    tid: sig_content.get(tid, {})
                }),
                                    verbosity=verbosity,
                                    runmode=runmode,
                                    sigmode=sigmode,
                                    monitor_interval=None,
                                    resource_monitor_interval=None)
                try:
                    copy_out_and_err(res)
                except Exception as e:
                    env.logger.warning(
                        f'Failed to copy result of subtask {tid}: {e}')
                results.append(res)
            # for res in results:
            #     if 'exception' in res:
            #         failed = [x.get("task", "")
            #                   for x in results if "exception" in x]
            #         env.logger.error(
            #             f'{task_id} ``failed`` due to failure of subtask{"s" if len(failed) > 1 else ""} {", ".join(failed)}')
            #         return {'ret_code': 1, 'exception': res['exception'], 'task': task_id}
    #
    # now we collect result
    all_res = {
        'ret_code': 0,
        'output': None,
        'subtasks': {},
        'shared': {},
        'skipped': 0,
        'signature': {}
    }
    for tid, x in zip(params.task_stack, results):
        all_res['subtasks'][tid[0]] = x

        if 'exception' in x:
            all_res['exception'] = x['exception']
            all_res['ret_code'] += 1
            continue
        all_res['ret_code'] += x['ret_code']
        if all_res['output'] is None:
            all_res['output'] = x['output']
        else:
            try:
                all_res['output'].extend(x['output'], keep_groups=True)
            except Exception as e:
                env.logger.warning(
                    f"Failed to extend output {all_res['output']} with {x['output']}"
                )
        all_res['shared'].update(x['shared'])
        # does not care if one or all subtasks are executed or skipped.
        all_res['skipped'] += x.get('skipped', 0)
        if 'signature' in x:
            all_res['signature'].update(x['signature'])

    if all_res['ret_code'] != 0:
        if all_res['ret_code'] == len(results):
            env.logger.info(
                f'All {len(results)} tasks in {task_id} ``failed``')
        else:
            env.logger.info(
                f'{all_res["ret_code"]} of {len(results)} tasks in {task_id} ``failed``'
            )
        # if some failed, some skipped, not skipped
        if 'skipped' in all_res:
            all_res.pop('skipped')
    elif all_res['skipped']:
        if all_res['skipped'] == len(results):
            env.logger.info(
                f'All {len(results)} tasks in {task_id} ``ignored`` or skipped'
            )
        else:
            # if only partial skip, we still save signature and result etc
            env.logger.info(
                f'{all_res["skipped"]} of {len(results)} tasks in {task_id} ``ignored`` or skipped'
            )
            all_res.pop('skipped')
    else:
        env.logger.info(f'All {len(results)} tasks in {task_id} ``completed``')
    return all_res
Ejemplo n.º 56
0
def authorate(arguments):
    """Main function which delegates to fabric tasks."""
    global engine
    engine = create_engine('sqlite:///' + arguments['--db'])
    create_db(engine)

    global VERBOSE
    VERBOSE = arguments['--verbose']
    multi_thread = not arguments['--one']

    if arguments['-C']:
        classify.classifiers_dir = arguments['-C']

    # Assume successful return value
    ret = 0
    if arguments['load']:

        # Load in words and word counts from file
        session = get_session(engine)
        if len(session.query(Word_Count).all()) == 0:
            subprocess.call('sqlite3 ' + arguments['--db'] +
                            ' < import_words.sql',
                            shell=True)

        prefix = arguments['--prefix']
        if os.path.exists(prefix):
            # Determine how many snippets to get per path.
            snippets_count = arguments['<snippets-per-path>']
            if not snippets_count:
                snippets_count = DEFAULT_SNIPPETS_COUNT

            pool = Pool(cpu_count() if multi_thread else 1)
            with open(arguments['<paths-file>'], 'r') as paths_file:
                paths = paths_file.readlines()
                for path in paths:
                    res = load_path(pool,
                                    path.rstrip(),
                                    prefix=prefix,
                                    multi_thread=multi_thread)
                    if not res:
                        ret = 3
            # Join the pool
            pool.close()
            pool.join()
        else:
            display_error(
                "The given prefix does not exist: {path}".format(path=prefix))
            ret = 2

    elif arguments['process']:
        # Cleanup the classifier dir
        classify.clean_classifier_dir()

        # Get and scale data from snippets
        session = get_session(engine)
        snippets = session.query(Book, Snippet).join(Snippet).all()
        data = [text_to_vector(snip.text, session) for _, snip in snippets]
        scaler = classify.create_and_save_scaler(data)
        scaled_data = scaler.transform(data)
        targets = [book.path_id for book, _ in snippets]

        # Train the classifiers
        for (Cls, kwargs) in classify.classifier_types:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                classifier = Cls(**kwargs)
                classifier.fit(scaled_data, targets)
            classify.save_classifier(classifier)

    elif arguments['classify']:
        snip_file = arguments['<snippet-file>']
        input_files = [snip_file if snip_file else '-']
        classify.classify_all(
            engine, " ".join([
                unicode(line.rstrip(), errors='ignore')
                for line in fileinput.input(input_files)
            ]))

    elif arguments['test']:
        session = get_session(engine)
        snippets = session.query(Book, Snippet).join(Snippet).all()
        if VERBOSE:
            print("Converting raw data to vectors. . .")
        data = [text_to_vector(snip.text, session) for _, snip in snippets]
        targets = [book.path_id for book, _ in snippets]
        classify.test_all(engine, data, targets)

    else:
        display_error("No subcommand given.")
        ret = 1
    return ret
Ejemplo n.º 57
0
def pos_type_classify(bamfile,
                      chrom,
                      start,
                      end,
                      is_single,
                      read_length,
                      temp_dir,
                      extension=None,
                      center=True,
                      maxsize=None,
                      process=20,
                      minmapq=0,
                      is_multmapfilter=False):
    print bamfile, chrom, start, end, is_single, read_length, temp_dir, extension, center
    if is_single:
        total_reads_type6_left = [
        ]  # 6. in left place of del and second read is on the breakpoint
        total_reads_type6_right = [
        ]  # 6. in right place of del and first read is on the breakpoint
        total_reads_type7 = []  # 7. reads within the del
        # temp_prefix = "%s/classify_%s" % (temp_dir, sub_num)
        if extension:
            rel_start = start - extension
            rel_end = end + extension
        else:
            rel_start = start
            rel_end = end
        if center:
            reads_type6_left, reads_type6_right, reads_type7, filtered_reads_num = posType_sub_single(
                bamfile, chrom, rel_start, rel_end, start, end, minmapq,
                is_multmapfilter)
        else:
            rel_start_left = rel_start
            rel_end_left = start + maxsize
            rel_start_right = end - maxsize
            rel_end_right = rel_end
            reads_type6_left_1, reads_type6_right_1, reads_type7_1, filtered_reads_num_1 = posType_sub_single(
                bamfile, chrom, rel_start_left, rel_end_left, start, end,
                minmapq, is_multmapfilter)
            reads_type6_left_2, reads_type6_right_2, reads_type7_2, filtered_reads_num_2 = posType_sub_single(
                bamfile, chrom, rel_start_right, rel_end_right, start, end,
                minmapq, is_multmapfilter)
            reads_type6_left = reads_type6_left_1 + reads_type6_right_1
            reads_type6_right = reads_type6_right_1 + reads_type6_right_2
            reads_type7 = reads_type7_1 + reads_type7_2
            filtered_reads_num = filtered_reads_num_1 + filtered_reads_num_2

        total_reads_type6_left.extend(reads_type6_left)
        total_reads_type6_right.extend(reads_type6_right)
        total_reads_type7.extend(reads_type7)
        total_filtered_reads = filtered_reads_num
        print total_reads_type6_left, total_reads_type6_right, total_reads_type7, total_filtered_reads
        return total_reads_type6_left, total_reads_type6_right, total_reads_type7, total_filtered_reads
    else:
        total_reads_type1_left = [
        ]  # 1. in left place of del and second read is on the breakpoint
        total_reads_type1_right = [
        ]  # 1. in right place of del and first read is on the breakpoint
        total_reads_type2_left = [
        ]  # 2. in left place of del and first read is on the breakpoint
        total_reads_type2_right = [
        ]  # 2. in right place of del and second read is on the breakpoint
        total_reads_type3_left = [
        ]  # 3. in left place of del and first read and right read is crossover breakpoint with no intersection
        total_reads_type3_right = [
        ]  # 3. in right place of del and first read and right read is crossover breakpoint with no intersection
        total_reads_type4 = []  # 4. reads within the del
        total_reads_type5_left = [
        ]  # 5. in left place of del and first read and right read are all has intersection
        total_reads_type5_right = [
        ]  # 3. in right place of del and first read and right read are all has intersection
        total_filtered_reads = 0

        length = end - start + 1
        sub_num = length / read_length

        # when start = end, translocation of chromosome
        if start == end:
            rel_start = start - maxsize
            rel_end = end + maxsize
            print rel_start, rel_end
            # temp_prefix = "%s/classify_%s" % (temp_dir, "whole")
            (reads_type1_left, reads_type1_right, reads_type2_left, reads_type2_right, reads_type3_left,
             reads_type3_right, reads_type4, reads_type5_left, reads_type5_right, filtered_reads_num) \
                = posType_sub_paired(bamfile, chrom, rel_start, rel_end, start, end, read_length, minmapq,
                                     is_multmapfilter, extension=extension)
            total_reads_type1_left.extend(reads_type1_left)
            total_reads_type1_right.extend(reads_type1_right)
            total_reads_type2_left.extend(reads_type2_left)
            total_reads_type2_right.extend(reads_type2_right)
            total_reads_type3_left.extend(reads_type3_left)
            total_reads_type3_right.extend(reads_type3_right)
            total_reads_type4.extend(reads_type4)
            total_reads_type5_left.extend(reads_type5_left)
            total_reads_type5_right.extend(reads_type5_right)
            total_filtered_reads = filtered_reads_num

        # end - start < read_length and there is no need to extend its scope
        elif sub_num == 0 and not extension:
            # temp_prefix = "%s/classify_%s" % (temp_dir, sub_num)
            (reads_type1_left, reads_type1_right, reads_type2_left, reads_type2_right, reads_type3_left,
             reads_type3_right, reads_type4, reads_type5_left, reads_type5_right, filtered_reads_num) \
                = posType_sub_paired(bamfile, chrom, start, end, start, end, read_length, minmapq, is_multmapfilter,
                                     extension=extension)
            total_reads_type1_left.extend(reads_type1_left)
            total_reads_type1_right.extend(reads_type1_right)
            total_reads_type2_left.extend(reads_type2_left)
            total_reads_type2_right.extend(reads_type2_right)
            total_reads_type3_left.extend(reads_type3_left)
            total_reads_type3_right.extend(reads_type3_right)
            total_reads_type4.extend(reads_type4)
            total_reads_type5_left.extend(reads_type5_left)
            total_reads_type5_right.extend(reads_type5_right)
            total_filtered_reads = filtered_reads_num

        # there should be more than one process to calculate.
        else:
            run_pool = Pool(process)
            result_list = []
            # extension the range to cover whole reads
            if extension:
                rel_start = start - extension
                rel_end = end + extension
                length = rel_end - rel_start + 1
                sub_num = length / read_length
            else:
                rel_start = start
                rel_end = end
            # if center should be consider or center is no need to consider, but the center size is too less
            if center or (not center and maxsize is not None
                          and length < maxsize * 2):
                for i in range(sub_num):
                    sub_start = i * read_length + rel_start
                    if i == sub_num - 1:
                        sub_end = rel_end
                    else:
                        sub_end = sub_start + 1
                    print "Sub Process: %s" % i, sub_start, sub_end
                    result_list.append(
                        run_pool.apply_async(
                            posType_sub_paired,
                            args=(bamfile, chrom, sub_start, sub_end, start,
                                  end, read_length, minmapq, is_multmapfilter,
                                  extension)))
                run_pool.close()
                run_pool.join()
            # if center is no need to consider
            else:
                rel_start_left = rel_start
                rel_end_left = start + maxsize
                rel_start_right = end - maxsize
                rel_end_right = rel_end
                # print rel_start_left, rel_end_left, rel_start_right, rel_end_right
                length = rel_end_left - rel_start_left + 1
                sub_num = length / read_length
                for i in range(sub_num):
                    sub_start = i * read_length + rel_start_left
                    if i == sub_num - 1:
                        sub_end = rel_end_left
                    else:
                        sub_end = sub_start + 1
                    print "Sub Process: %s" % i, sub_start, sub_end
                    # temp_prefix = "%s/classify_%s" % (temp_dir, i)
                    result_list.append(
                        run_pool.apply_async(
                            posType_sub_paired,
                            args=(bamfile, chrom, sub_start, sub_end, start,
                                  end, read_length, minmapq, is_multmapfilter,
                                  extension)))
                length = rel_end_right - rel_start_right + 1
                sub_num = length / read_length
                for i in range(sub_num):
                    sub_start = i * read_length + rel_start_right
                    if i == sub_num - 1:
                        sub_end = rel_end_right
                    else:
                        sub_end = sub_start + 1
                    print "Sub Process: %s" % i, sub_start, sub_end
                    # temp_prefix = "%s/classify_%s" % (temp_dir, i)
                    result_list.append(
                        run_pool.apply_async(
                            posType_sub_paired,
                            args=(bamfile, chrom, sub_start, sub_end, start,
                                  end, read_length, minmapq, is_multmapfilter,
                                  extension)))
                run_pool.close()
                run_pool.join()

            for res in result_list:
                reads_type1_left, reads_type1_right, reads_type2_left, reads_type2_right, reads_type3_left, reads_type3_right, reads_type4, reads_type5_left, reads_type5_right, filtered_reads_num = res.get(
                )
                total_reads_type1_left.extend(reads_type1_left)
                total_reads_type1_right.extend(reads_type1_right)
                total_reads_type2_left.extend(reads_type2_left)
                total_reads_type2_right.extend(reads_type2_right)
                total_reads_type3_left.extend(reads_type3_left)
                total_reads_type3_right.extend(reads_type3_right)
                total_reads_type4.extend(reads_type4)
                total_reads_type5_left.extend(reads_type5_left)
                total_reads_type5_right.extend(reads_type5_right)
                total_filtered_reads += filtered_reads_num

        print "type1_left: %s; type1_right: %s, type2_left: %s; type2_right: %s, type3_left: %s; " \
              "type3_right: %s, type4: %s; type5_left: %s; type5_right: %s" % (
                  len(total_reads_type1_left), len(total_reads_type1_right), len(total_reads_type2_left),
                  len(total_reads_type2_right), len(total_reads_type3_left), len(total_reads_type3_right),
                  len(total_reads_type4), len(total_reads_type5_left), len(total_reads_type5_right))
        print "total_filtered_reads: %s" % total_filtered_reads
        return total_reads_type1_left, total_reads_type1_right, total_reads_type2_left, total_reads_type2_right, total_reads_type3_left, total_reads_type3_right, total_reads_type4, total_reads_type5_left, total_reads_type5_right, total_filtered_reads
Ejemplo n.º 58
0
def main(args):
    """Do it all."""
    if not os.path.isdir(args.logs):
        raise Fail("Logs location '%s' is not a directory." % args.logs)

    builds = gather_builds(args)
    if args.verbose:
        print("Lined up %d builds." % len(builds))

    # The "configure" step is single-threaded.  We can run many at the same
    # time, even when we're also running a "build" step at the same time.
    # This means we may run a lot more processes than we have CPUs, but there's
    # no law against that.  There's also I/O time to be covered.
    configure_pool = Pool()

    # Builds which have failed the "configure" stage, with their errors.  This
    # queue must never stall, so that we can let results pile up here while the
    # work continues.
    configure_fails = Queue(len(builds))

    # Waiting list for the "build" stage.  It contains Build objects,
    # terminated by a final None to signify that there are no more builds to be
    # done.
    build_queue = JoinableQueue(10)

    # Builds that have failed the "build" stage.
    build_fails = Queue(len(builds))

    # Waiting list for the "test" stage.  It contains Build objects, terminated
    # by a final None.
    test_queue = JoinableQueue(10)

    # The "build" step tries to utilise all CPUs, and it may use a fair bit of
    # memory.  Run only one of these at a time, in a single worker process.
    build_worker = Process(
        target=service_builds, args=(build_queue, build_fails, test_queue))
    build_worker.start()

    # Builds that have failed the "test" stage.
    test_fails = Queue(len(builds))

    # Completed builds.  This must never stall.
    done_queue = JoinableQueue(len(builds))

    # The "test" step can not run concurrently (yet).  So, run tests serially
    # in a single worker process.  It takes its jobs directly from the "build"
    # worker.
    test_worker = Process(
        target=service_tests, args=(test_queue, test_fails, done_queue))
    test_worker.start()

    # Feed all builds into the "configure" pool.  Each build which passes this
    # stage goes into the "build" queue.
    for build in builds:
        configure_pool.apply_async(
            build.do_configure, callback=partial(enqueue, build_queue, build),
            error_callback=partial(enqueue_error, configure_fails, build))
    if args.verbose:
        print("All jobs are underway.")
    configure_pool.close()
    configure_pool.join()

# TODO: Async reporting for faster feedback.
    configure_fail_count = report_failures(configure_fails, "CONFIGURE FAIL")
    if args.verbose:
        print("Configure stage done.")

    # Mark the end of the build queue for the build worker.
    build_queue.put(None)

    build_worker.join()
# TODO: Async reporting for faster feedback.
    build_fail_count = report_failures(build_fails, "BUILD FAIL")
    if args.verbose:
        print("Build step done.")

    # Mark the end of the test queue for the test worker.
    test_queue.put(None)

    test_worker.join()
# TODO: Async reporting for faster feedback.
    test_fail_count = report_failures(test_fails, "TEST FAIL")
    if args.verbose:
        print("Test step done.")

    # All done.  Clean up.
    for build in builds:
        build.clean_up()

    ok_count = count_entries(done_queue)
    if ok_count == len(builds):
        print("All tests OK.")
    else:
        print(
            "Failures during configure: %d - build: %d - test: %d.  OK: %d."
            % (
                configure_fail_count,
                build_fail_count,
                test_fail_count,
                ok_count,
            ))
Ejemplo n.º 59
0
def aggregate_scores(test_ref_pairs,
                     evaluator=NiftiEvaluator,
                     labels=None,
                     nanmean=True,
                     json_output_file=None,
                     json_name="",
                     json_description="",
                     json_author="Fabian",
                     json_task="",
                     num_threads=2,
                     **metric_kwargs):
    """
    test = predicted image
    :param test_ref_pairs:
    :param evaluator:
    :param labels: must be a dict of int-> str or a list of int
    :param nanmean:
    :param json_output_file:
    :param json_name:
    :param json_description:
    :param json_author:
    :param json_task:
    :param metric_kwargs:
    :return:
    """

    if type(evaluator) == type:
        evaluator = evaluator()

    if labels is not None:
        evaluator.set_labels(labels)

    all_scores = OrderedDict()
    all_scores["all"] = []
    all_scores["mean"] = OrderedDict()

    test = [i[0] for i in test_ref_pairs]
    ref = [i[1] for i in test_ref_pairs]
    p = Pool(num_threads)
    all_res = p.map(
        run_evaluation,
        zip(test, ref, [evaluator] * len(ref), [metric_kwargs] * len(ref)))
    p.close()
    p.join()

    for i in range(len(all_res)):
        all_scores["all"].append(all_res[i])

        # append score list for mean
        for label, score_dict in all_res[i].items():
            if label in ("test", "reference"):
                continue
            if label not in all_scores["mean"]:
                all_scores["mean"][label] = OrderedDict()
            for score, value in score_dict.items():
                if score not in all_scores["mean"][label]:
                    all_scores["mean"][label][score] = []
                all_scores["mean"][label][score].append(value)

    for label in all_scores["mean"]:
        for score in all_scores["mean"][label]:
            if nanmean:
                all_scores["mean"][label][score] = float(
                    np.nanmean(all_scores["mean"][label][score]))
            else:
                all_scores["mean"][label][score] = float(
                    np.mean(all_scores["mean"][label][score]))

    # save to file if desired
    # we create a hopefully unique id by hashing the entire output dictionary
    if json_output_file is not None:
        json_dict = OrderedDict()
        json_dict["name"] = json_name
        json_dict["description"] = json_description
        timestamp = datetime.today()
        json_dict["timestamp"] = str(timestamp)
        json_dict["task"] = json_task
        json_dict["author"] = json_author
        json_dict["results"] = all_scores
        json_dict["id"] = hashlib.md5(
            json.dumps(json_dict).encode("utf-8")).hexdigest()[:12]
        save_json(json_dict, json_output_file)

    return all_scores
Ejemplo n.º 60
0
	def test(self, model, data_dir, fnames, D_config, use_logits=False):
		"""
		model: LSTM/LSTM_CNN/BiLSTM class, model for testing, should be loaded before pass in
		data_dir: .npz file dir
		fnames: list of file names for training/testing
		D_config: data loader config
		"""
		self.D_config = D_config
		self.D_config["free_mem"] = True
		self.use_logits = use_logits

		# reset for testing
		self.all_y = []
		self.all_pred = []

		target_files = self.data_dir2target_files(data_dir, fnames)
		if target_files is None:
			return None

		file_idx = 0

		# indicating that all file started loading
		all_done = False

		pool = Pool(self.num_threads)
		print("[INFO] Notice that we are using multiprocessing to load files, so that child processes won't print out on ipython-notebook, which only monitor the parent process. Please check the terminal for more logging info.")
		self.cur_file_in_mem = 0
		while not all_done:
			one_file = target_files[file_idx]
			if self.cur_file_in_mem < self.num_file_in_mem and not all_done:
				# start a new process for loading test data
				pool.apply_async(self._newthread_helper, args=(one_file,), \
					callback=self._callback_helper, error_callback=self._error_helper)
				self.cur_file_in_mem += 1
				file_idx = file_idx + 1
				if file_idx == len(target_files):
					# all file started loading
					all_done = True
			else:
				time.sleep(0.001)

			if len(self.DataPool) > 0:
				self._batch_test_helper(model)
			else:
				time.sleep(0.001)

		pool.close()
		# wait all child process done, i.e. put all Heater_Data into self.DataPool
		pool.join()
		for _ in range(len(self.DataPool)):
			self._batch_test_helper(model)

		# calc overall accuracy and AUC
		self.all_y_onehot = np.concatenate(self.all_y)
		self.all_y = np.argmax(self.all_y_onehot, axis=1)
		self.all_pred = np.concatenate(self.all_pred)
		pred_y = np.argmax(self.all_pred, axis=1)
		m_auc = roc_auc_score(self.all_y_onehot, self.all_pred)
		print("overall acc: %.4f, overall AUC: %.4f" % (np.mean(pred_y==self.all_y), m_auc))
		# reset self.DataPool for future training/testing
		self.DataPool.clear()
		return m_auc