Beispiel #1
0
 def _itergroundings(self, simplify=False, unsatfailure=False):
     global global_bpll_grounding
     global_bpll_grounding = self
     if self.multicore:
         pool = Pool(maxtasksperchild=1)
         try:
             for gndresult in pool.imap(with_tracing(create_formula_groundings), self.formulas):
                 for fidx, stat in gndresult:
                     for (varidx, validx, val) in stat: 
                         self._varidx2fidx[varidx].add(fidx)
                         self._addstat(fidx, varidx, validx, val)
                     checkmem()
                 yield None
         except CtrlCException as e:
             pool.terminate()
             raise e
         pool.close()
         pool.join()
     else:
         for gndresult in imap(create_formula_groundings, self.formulas):
             for fidx, stat in gndresult:
                 for (varidx, validx, val) in stat: 
                     self._varidx2fidx[varidx].add(fidx)
                     self._addstat(fidx, varidx, validx, val)
             yield None
Beispiel #2
0
 def _itergroundings(self, simplify=True, unsatfailure=True):
     # generate all groundings
     if not self.formulas:
         return
     global global_fastConjGrounding
     global_fastConjGrounding = self
     batches = list(rndbatches(self.formulas, 20))
     batchsizes = [len(b) for b in batches]
     if self.verbose:
         bar = ProgressBar(width=100, steps=sum(batchsizes), color='green')
         i = 0
     if self.multicore:
         pool = Pool()
         try:
             for gfs in pool.imap(with_tracing(create_formula_groundings), batches):
                 if self.verbose:
                     bar.inc(batchsizes[i])
                     bar.label(str(cumsum(batchsizes, i + 1)))
                     i += 1
                 for gf in gfs: yield gf
         except Exception as e:
             logger.error('Error in child process. Terminating pool...')
             pool.close()
             raise e
         finally:
             pool.terminate()
             pool.join()
     else:
         for gfs in imap(create_formula_groundings, batches):
             if self.verbose:
                 bar.inc(batchsizes[i])
                 bar.label(str(cumsum(batchsizes, i + 1)))
                 i += 1
             for gf in gfs: yield gf
Beispiel #3
0
def run():
    setup_logger()
    logger.info('Started')
    queue = multiprocessing.Queue(maxsize=EVENT_QUEUE_MAX_SIZE)
    pool = Pool(processes=WORKERS,
            initializer=worker,
            initargs=(queue,))

    event_handler = EventHandler(queue)
    observer = init_observer()
    try:
        delete_all_files(FRAMES_PATH)
        observer.schedule(event_handler, path=FRAMES_PATH, recursive=True)
        signal.signal(signal.SIGINT, signal_handler)
        observer.start()

        while True:
            pool._maintain_pool() #restart workers if needed
            time.sleep(1)
            now = datetime.datetime.now()
            if now - event_handler.last_event > datetime.timedelta(minutes=1):
                logger.warning("No events received in the last minute.")
                # Sometimes watchdog stops receiving events.
                # We exit, so the process can be restarted.
                break
    except KeyboardInterrupt as err:
        logger.warning("Keyboard interruption")
    except Exception as err:
        logger.exception(err)
    finally:
        observer.stop()
    observer.join()
    pool.terminate()
    logger.warning("Bye")
Beispiel #4
0
def work(host, port, processes, threads, times):
    pool = Pool(processes,
                lambda: signal.signal(signal.SIGINT, signal.SIG_IGN))
    p = Process(target=progress)
    p.daemon = True

    start = time.time()

    try:
        for chunk in divide(times, processes):
            pool.apply_async(thread, (host, port, threads, chunk))

        p.start()

        pool.close()
        pool.join()
        p.terminate()
        p.join()

    except KeyboardInterrupt:
        pool.terminate()
        p.terminate()
        p.join()
        pool.join()

    return time.time() - start
    def run(self, test_name=None, db_adapter=None):

        if db_adapter is None:
            db_adapter = DEFAULT_DATABASE_ADAPTER
        if test_name is None:
            test_name = '_'.join([db_adapter, datetime.datetime.now().strftime("%Y-%m-%d %H:%M")])

        print ''.join(['Running "', test_name, '" test'])
        print 'Prepare database'

        adapter = adapter_factory(db_adapter)
        adapter.prepare_db()
        test_id = adapter.create_new_test(test_name)

        print ''
        print 'Create user documents'

        pool = Pool(processes=10)
        params = [{'user_id': i, 'docs_per_user': DOCS_PER_USER, 'db_adapter': db_adapter}
                  for i in range(1, USERS_COUNT + 1)]

        start = time.time()
        try:
            pool.map(create_users, params)
            print 'Full time:', time.time() - start
        finally:
            pool.terminate()
        del pool

        print 'OK! Users were created!'
        print ''

        for i in range(1, MAX_PROCESSES + 1):
            print 'Run test with %d proceses' % i
            pool = Pool(processes=i)
            params = [{'user_id': j, 'db_adapter': db_adapter} for j in range(1, USERS_COUNT + 1)]
            start = time.time()
                
            try:
                res = pool.map(update_users, params)
                full_time = time.time() - start
            finally:
                pool.terminate()
            del pool

            print 'Test is finished! Save results'
            print ''

            adapter.save_results(test_id, res, i)

            print 'Full time:', full_time
            print ''

        print 'Finish!'
Beispiel #6
0
def create_initial_partial_vocabs(all_files, path_to_dump: str):
    partial_vocabs_queue = []
    files_total = len(all_files)
    current_file = 0
    chunk_generator = create_chunk_generator(len(all_files), N_CHUNKS)
    params = [(file, path_to_dump, chunk)
              for file, chunk in zip(all_files, chunk_generator)]
    pool = Pool()
    partial_vocab_it = pool.imap_unordered(create_and_dump_partial_vocab,
                                           params)
    for partial_vocab in partial_vocab_it:
        partial_vocabs_queue.append(partial_vocab)
        current_file += 1
        logger.info(
            f"To partial vocabs added  {current_file} out of {files_total}")
    pool.terminate()
    return partial_vocabs_queue
Beispiel #7
0
    def postprocd(self, func, nthreads=1, pool=None):
        """
        Post-process some values into this chain. 
                
        Args:
            func : a function which accepts all the keys in the chain
                and returns a dictionary of new keys to add. `func` must accept *all* 
                keys in the chain, if there are ones you don't need, capture them 
                with **_ in its call signature, e.g. to add in a parameter 'b'
                which is 'a' squared, use postprocd(lambda a,**_: {'b':a**2})
            nthreads : the number of threads to use
            pool : any worker pool which has a pool.map function. 
               default: multiprocessing.Pool(nthreads)
               
        Returns:
            A new chain with the new values post-processed in.
            Does not alter the original chain. If for some rows in the
            chain `func` did not return all the keys, these will be filled
            in with `nan`. 
            
        Note:
            This repeatedly calls `func` on rows in the chain, so its very inneficient 
            if you already have a vectorized version of your post-processing function. 
            `postprocd` is mostly useful for slow non-vectorized post-processing functions, 
            allowing convenient use of the `nthreads` option to this function. 
            
            For the default implementation of `pool`, `func` must be picklable, 
            meaning it must be a module-level function. 
        """

        if pool is not None: _pool = pool
        elif nthreads != 1: _pool = Pool(nthreads)
        else: _pool = None

        mp = map if _pool is None else _pool.map

        try:
            dat = mp(partial(_postprocd_helper, func), self.iterrows())
        finally:
            if pool is None and _pool is not None: _pool.terminate()

        c = self.copy()
        allkeys = set(chain(*[d.keys() for d in dat]))
        c.update({k: array([d.get(k, nan) for d in dat]) for k in allkeys})
        return c
Beispiel #8
0
    def postprocd(self,func,nthreads=1,pool=None):
        """
        Post-process some values into this chain. 
                
        Args:
            func : a function which accepts all the keys in the chain
                and returns a dictionary of new keys to add. `func` must accept *all* 
                keys in the chain, if there are ones you don't need, capture them 
                with **_ in its call signature, e.g. to add in a parameter 'b'
                which is 'a' squared, use postprocd(lambda a,**_: {'b':a**2})
            nthreads : the number of threads to use
            pool : any worker pool which has a pool.map function. 
               default: multiprocessing.Pool(nthreads)
               
        Returns:
            A new chain with the new values post-processed in.
            Does not alter the original chain. If for some rows in the
            chain `func` did not return all the keys, these will be filled
            in with `nan`. 
            
        Note:
            This repeatedly calls `func` on rows in the chain, so its very inneficient 
            if you already have a vectorized version of your post-processing function. 
            `postprocd` is mostly useful for slow non-vectorized post-processing functions, 
            allowing convenient use of the `nthreads` option to this function. 
            
            For the default implementation of `pool`, `func` must be picklable, 
            meaning it must be a module-level function. 
        """

        if pool is not None: _pool = pool
        elif nthreads!=1: _pool = Pool(nthreads)
        else: _pool = None

        mp=map if _pool is None else _pool.map

        try:
            dat = mp(partial(_postprocd_helper,func),self.iterrows())
        finally:
            if pool is None and _pool is not None: _pool.terminate()

        c=self.copy()
        allkeys = set(chain(*[d.keys() for d in dat]))
        c.update({k:array([d.get(k,nan) for d in dat]) for k in allkeys})
        return c
Beispiel #9
0
def run(config_uri, app_name=None, username=None, types=(), batch_size=500, processes=None):
    # multiprocessing.get_context is Python 3 only.
    from multiprocessing import get_context
    from multiprocessing.pool import Pool

    # Loading app will have configured from config file. Reconfigure here:
    logging.getLogger('snovault').setLevel(logging.DEBUG)

    testapp = internal_app(config_uri, app_name, username)
    connection = testapp.app.registry[CONNECTION]
    uuids = [str(uuid) for uuid in connection.__iter__(*types)]
    transaction.abort()
    logger.info('Total items: %d' % len(uuids))

    pool = Pool(
        processes=processes,
        initializer=initializer,
        initargs=(config_uri, app_name, username),
        context=get_context('forkserver'),
    )

    all_results = []
    try:
        for result in pool.imap_unordered(worker, batched(uuids, batch_size), chunksize=1):
            results = result['results']
            errors = sum(error for item_type, path, update, error in results)
            updated = sum(update for item_type, path, update, error in results)
            logger.info('Batch: Updated %d of %d (errors %d)' %
                        (updated, len(results), errors))
            all_results.extend(results)
    finally:
        pool.terminate()
        pool.join()

    def result_item_type(result):
        # Ensure we always return a string
        return result[0] or ''

    for item_type, results in itertools.groupby(
            sorted(all_results, key=result_item_type), key=result_item_type):
        results = list(results)
        errors = sum(error for item_type, path, update, error in results)
        updated = sum(update for item_type, path, update, error in results)
        logger.info('Collection %s: Updated %d of %d (errors %d)' %
                    (item_type, updated, len(results), errors))
Beispiel #10
0
def multiprocess_all_chromosomes(func, cls, *args, **kwargs):
    '''
    Convenience method for splitting up queries based on tag id.
    '''
    processes = current_settings.ALLOWED_PROCESSES

    set_chromosome_lists(cls, use_table=kwargs.get('use_table', None))
    p = Pool(processes)

    try:
        for chr_list in current_settings.CHR_LISTS:
            p.apply_async(func, args=[cls, chr_list, ] + list(args))
        p.close()
        p.join()
    except Exception as e:
        print('Terminating pool.')
        p.terminate()
        raise e
Beispiel #11
0
def stress_the_melon(processes=10, times=500, url=URL):
    """
    :param processes: ammount of processes running at the same time 
    :param times: how many requests you want to make (sum)
    :param url: the url you want to get
    """
    print('Stress test working with ' + str(processes) + ' threads and ' +
          str(times) + ' requests\nURL: ' + URL)
    pool = Pool(processes=int(processes))

    for i in range(0, int(times)):
        result = pool.apply_async(do_get_request, [
            url,
        ])
        if result.get() != 200:
            print('OK, the Melon just died :(')

    pool.terminate()
Beispiel #12
0
    def apply_forces(self, inds):
        """
        Uses multithreading to apply loads to the unit-stress tensors
        in the individuals provided. 

        inputs: 
        inds: tensor_ind objects representing a series of designs. 

        outputs: 
        app: A nxm array of stress tensors, where n is the number of individuals in inds, 
             and m is the number of elements that data was requested from in each individual's 
             apply_force method. 
        """
        pool = Pool(8)
        args_to_pool = [[x, self.sto_force_x, self.sto_force_y] for x in inds]
        app = pool.starmap(self.call_apply, args_to_pool)
        pool.terminate()
        return app
def crawl_companies_files(phantomjs_path,
                          workers_num=10,
                          include_companies=None,
                          from_date=None):

    companies_files = []
    pool = Pool(processes=workers_num)

    try:
        # Obtain the ccvm codes of all the listed companies
        ccvm_codes = [
            r.ccvm for r in BovespaCompany.objects.only(["ccvm"]).all()
        ]

        ccvm_codes = sorted(ccvm_codes)

        _logger.debug("Processing the files of {0} companies from {1}".format(
            len(ccvm_codes), "{0:%Y-%m-%d}".format(from_date)
            if from_date else "THE BEGINNING"))

        func_params = []
        for ccvm_code in ccvm_codes:
            if include_companies and ccvm_code not in include_companies:
                continue

            for doc_type in DOC_TYPES:
                func_params.append(
                    [ccvm_code, phantomjs_path, doc_type, from_date])

        # call_results = pool.starmap(obtain_company_files, func_params)
        pool.starmap(obtain_company_files, func_params)

        # Merge all the responses into one only list
        # companies_files += list(
        #    itertools.chain.from_iterable(call_results))

    except TimeoutError:
        print("Timeout error")
        traceback.print_exc()
        raise
    finally:
        pool.close()
        pool.join()
        pool.terminate()
Beispiel #14
0
def show_logbook():
    """
    Show information about any jobs currently running
    Uses many threads to poll the job board because the latency
    can be high but the processing power required is low
    :return None:
    """
    print("Not connected to jobboard")
    pool = Pool(processes=SHOW_POLLERS)
    try:
        for _ in range(SHOW_POLLERS):
            pool.apply(query_and_print)
            sleep(0.1)
    except KeyboardInterrupt:
        pool.terminate()
    except Exception:
        pool.terminate()
    finally:
        pool.close()
        pool.join()
Beispiel #15
0
  def join(self):
    try:
      while True:
        total_tasks = len(self.results)
        done_tasks = 0
        for result in self.results:
          if result.ready():
            done_tasks += 1

        if done_tasks == total_tasks:
          self.progress('[%d task(s) completed, %d process(es)]',
            done_tasks, self._processes)
          break
        else:
          self.progress('[%d task(s) completed, %d remaining, %d process(es)]',
            done_tasks, total_tasks - done_tasks, self._processes)
          time.sleep(0.001)

    except KeyboardInterrupt:
      NativePool.terminate(self)
      return NativePool.join(self)
Beispiel #16
0
def _run_pool(uuids, args):
    from multiprocessing import get_context
    from multiprocessing.pool import Pool
    transaction.abort()
    pool = Pool(
        processes=args.processes,
        initializer=_pool_initializer,
        initargs=(args.config_uri, args.app_name, args.username),
        context=get_context('forkserver'),
        maxtasksperchild=args.maxtasksperchild,
    )
    est_loops = int(len(uuids) / args.batchsize)
    all_results = []
    try:
        pool_gen = pool.imap_unordered(
            _pool_worker,
            _pool_batch_results(uuids, args.batchsize),
            chunksize=args.chunksize,
        )
        for loop, result in enumerate(pool_gen, 1):
            results = result['results']
            error_msgs = [
                error_msg for _, _, _, _, error_msg in results if error_msg
            ]
            updated_cnt = sum(update for _, _, update, _, _ in results)
            log_msg = "{} of ~{} Batch: Updated {} of {} (errors {})".format(
                loop,
                est_loops,
                updated_cnt,
                len(results),
                len(error_msgs),
            )
            BATCH_UPGRADE_LOG.info(log_msg)
            for error_msg in error_msgs:
                BATCH_UPGRADE_LOG.error("\t%s", error_msg)
            all_results.extend(results)
    finally:
        pool.terminate()
        pool.join()
    return all_results
    def join(self):
        try:
            while True:
                total_tasks = len(self.results)
                done_tasks = 0
                for result in self.results:
                    if result.ready():
                        done_tasks += 1

                if done_tasks == total_tasks:
                    self.progress('[%d task(s) completed, %d process(es)]',
                                  done_tasks, self._processes)
                    break
                else:
                    self.progress(
                        '[%d task(s) completed, %d remaining, %d process(es)]',
                        done_tasks, total_tasks - done_tasks, self._processes)
                    time.sleep(0.001)

        except KeyboardInterrupt:
            NativePool.terminate(self)
            return NativePool.join(self)
Beispiel #18
0
    def image_urls(self):
        """ Iterates over json obj, gets image links
            Creates pool of workers, creates new workers """
        json_obj = self.jsonify()

        for post in json_obj["posts"]:
            if "ext" in post:
                self.total_count.value += 1

        try:
            self.thread_name = self.args.name
        except (KeyError, NameError):
            self.thread_name = json_obj["posts"][0]["sub"].replace(" ", "_")
        else:
            self.thread_name = str(json_obj["posts"][0]["no"])

        for post in json_obj["posts"]:
            if "ext" in post:
                filename = post["tim"] + post["ext"]
                image_url = "https://8ch.net/{board}/src/{file}".format(board=self.board, file=filename)
                self.downloads.append((image_url, filename))
                self.download_image(image_url, filename)

                with self.counter.get_lock():
                    self.counter.value += 1
                    update_progress(self.counter.value, self.total_count.value)

        pool = Pool(self.workers)
        pool_map = pool.map_async(self.download_image, self.downloads)

        try:
            pool_map.get(0xFFFF)
        except KeyboardInterrupt:
            print("Aborting")
            pool.terminate()
            pool.join()
        else:
            pool.close()
            pool.join()
Beispiel #19
0
def run_check(check_list, pid):
    queues = queue.Queue()

    pool = Pool(10)  # 创建一个线程池,10个线程数

    for fn in check_list:

        if mongo.db.tasks.find_one({"id": pid}) == None:
            pool.terminate()
            pool.close()
            return False

        print(fn)

        pool.apply_async(waf_check, (fn, queues))

    pool.close()  # 关闭进程池,不再接受新的进程
    pool.join()  # 主进程阻塞等待子进程的退出

    checkd_list = list(queues.queue)

    return checkd_list
Beispiel #20
0
def poolHandle(zip,nid):
	if DEBUG_LEVEL ==0 : 	
		p = Pool(80)
		for sub in zip.namelist():
			fobj = getSubFobj(zip,sub)
			if fobj != None : p.apply_async(handleSub,args=(fobj,nid))
		p.close()  
		p.join()
	elif DEBUG_LEVEL ==1 :
		p = billiard.Pool()
		_finalizers.append(Finalize(p, p.terminate))
		try:
			p.map_async(handleSub, [(getSubFobj(zip,sub),nid) for sub in zip.namelist()])
			p.close()
			p.join()
		finally:
			p.terminate()
	else :
		for sub in zip.namelist():
			fobj = getSubFobj(zip,sub)
			if fobj != None : handleSub(fobj,nid)
	zip.close()
def download_files(cache_folder,
                   files_per_ccvm_and_doc_type,
                   doc_types,
                   workers_num=10,
                   force_download=False,
                   include_companies=None):

    pool = Pool(processes=workers_num)
    try:
        func_params = []
        for key, files in files_per_ccvm_and_doc_type.items():

            ccvm, doc_type = key.split("_")

            # We process only the informed companies, if there is any informed
            if include_companies and ccvm not in include_companies:
                continue

            for (fiscal_date, protocol, version,
                 doc_type, delivery_type, delivery_date) in files:

                filename = "CCVM_{0}_{1:%Y%m%d}_{2}.{3}".format(
                    ccvm, fiscal_date, version.replace(".",""), doc_type)

                func_params.append([
                    cache_folder, ccvm, fiscal_date, version,
                    doc_type, protocol, force_download])

            _logger.debug("Downloading {} files...".format(len(func_params)))
            call_results = pool.starmap(download_file, func_params)

            generate_dataset(call_results)
    except TimeoutError:
        _logger.exception("Timeout error")
        raise
    finally:
        pool.close()
        pool.join()
        pool.terminate()
    def handle(self, *args, **options):
        global crawler_clazz, crawler

        workers_num = options.get("workers_num", 1)

        results = []
        pool = Pool(processes=workers_num)

        try:
            crawler_params = crawler.crawl_params(**options)

            _logger.info("Starting crawling with {0} params.".format(
                len(crawler_params)))

            func_params = []
            for crawler_param in crawler_params:
                func_params.append([crawler_param, options])

            _logger.info("Starting a Pool of %d processes" % workers_num)

            # crawl(*func_params[0])
            # call_results = pool.starmap(crawl, func_params)
            pool.starmap(crawl, func_params)

            # Merge all the responses into one only list
            # if call_results:
            #    results += list(
            #        itertools.chain.from_iterable(call_results))
            #    _logger.info("Crawler results ({0}): {1}".
            #             format(len(results), results))

            _logger.info("Crawler successfully finished!")
        except TimeoutError:
            _logger.error("Timeout error")
        finally:
            pool.close()
            pool.join()
            pool.terminate()
Beispiel #23
0
def main():
    global sqs_conn, sqs_queue
    args = parse_args()

    start_time = datetime.datetime.utcnow()
    first_start_time = start_time

    print "first start: %s" % first_start_time

    with open(args.get('config'), 'r') as f:
        config = json.load(f)

    sqs_config = config.get('sqs')

    sqs_conn = boto.sqs.connect_to_region(**sqs_config)
    queue_name = 'baas20sr_usea_baas20sr_usea_index_all_dead'
    sqs_queue = sqs_conn.get_queue(queue_name)

    last_size = sqs_queue.count()

    print 'Last Size: ' + str(last_size)

    pool = Pool(10)

    keep_going = True

    while keep_going:
        sqs_messages = sqs_queue.get_messages(
            num_messages=10,
            visibility_timeout=10,
            wait_time_seconds=10)

        if len(sqs_messages) > 0:
            pool.map(check_exists, sqs_messages)
        else:
            print 'DONE!'
            pool.terminate()
            keep_going = False
Beispiel #24
0
 def _itergroundings(self, simplify=True, unsatfailure=True):
     # generate all groundings
     if not self.formulas:
         return
     global global_fastConjGrounding
     global_fastConjGrounding = self
     batches = list(rndbatches(self.formulas, 20))
     batchsizes = [len(b) for b in batches]
     if self.verbose:
         bar = ProgressBar(steps=sum(batchsizes), color='green')
         i = 0
     if self.multicore:
         pool = Pool()
         try:
             for gfs in pool.imap(with_tracing(create_formula_groundings),
                                  batches):
                 if self.verbose:
                     bar.inc(batchsizes[i])
                     bar.label(str(cumsum(batchsizes, i + 1)))
                     i += 1
                 for gf in gfs:
                     yield gf
         except Exception as e:
             logger.error('Error in child process. Terminating pool...')
             pool.close()
             raise e
         finally:
             pool.terminate()
             pool.join()
     else:
         for gfs in map(create_formula_groundings, batches):
             if self.verbose:
                 bar.inc(batchsizes[i])
                 bar.label(str(cumsum(batchsizes, i + 1)))
                 i += 1
             for gf in gfs:
                 yield gf
Beispiel #25
0
    def image_urls(self):
        """ Iterates over json obj, gets image links
            Creates pool of workers, creates new workers """
        json_obj = self.jsonify()

        for post in json_obj['posts']:
            if 'ext' in post:
                self.total_count.value += 1

        self.thread_name = json_obj['posts'][0]['semantic_url']

        for post in json_obj['posts']:
            if 'ext' in post:
                filename = str(post['tim']) + post['ext']
                image_url = 'https://i.4cdn.org/{board}/{file}'.format(
                    board=self.board, file=filename)
                self.filename.append(filename)
                self.downloads.append(image_url)
                self.download_image(image_url, filename)

                with self.counter.get_lock():
                    self.counter.value += 1
                    update_progress(self.counter.value, self.total_count.value)

        manager = Manager()
        pool_data = manager.list(self.downloads)
        partial_data = partial(self.download_image, pool_data)
        pool = Pool(self.workers)
        pool_map = pool.map_async(partial_data, self.filename)

        try:
            pool.close()
            pool.join()
        except KeyboardInterrupt:
            print("Aborting")
            pool.terminate()
            pool.join()
Beispiel #26
0
def main(args):
    thread_count = args.n or 1
    p = Pool(thread_count)
    start = time.time()

    if args.f == 'ping':
        args_ips = args.ip.split('-')
        if args_ips[1]:
            ips = findips(args_ips[0], args_ips[1])
        else:
            ips = args_ips
        for ip in ips:
            p.apply_async(ping, args=(ip, ))  # 异步进程池

    elif args.f == 'tcp':
        ip = args.ip
        port_dict = {'ip': ip, 'port': {}}
        for port in range(1, 10001):
            result = p.apply_async(tcp, args=(ip, port))  # 异步进程池
            presult = result.get()
            if presult:
                port_dict['port'][port] = 'Open'
        print(port_dict)

        json_str = json.dumps(port_dict)
        path = os.path.abspath(os.path.dirname(__file__))
        filename = args.w or f'{path}/result.json'
        with open(filename, 'w') as f:
            f.write(json_str)

    p.close()
    p.join()
    p.terminate()

    end = time.time()
    if args.v:
        print('Time : ', end - start)
Beispiel #27
0
def competition():
    p = Pool(processes = 4)

    total = 0
    scores = {}

    mycmd = 'examples.Greedy'
    competitors = ['examples.Greedy']
    levels = ['map00']

    pairs = itertools.product([mycmd], competitors)
    games = list(itertools.product(levels, pairs))
    
    print "Running against %i commanders on %i levels, for a total of %i games.\n" % (len(competitors), len(levels), len(games))
    try:
        for level, results in p.map(run, games):
            for (_, bot), score in results.items():
                scores.setdefault(bot, [0, 0, 0, 0, 0])
                scores[bot][0] += score[0]                      # Flags captured.
                scores[bot][1] += score[1]                      # Flags conceded.
                scores[bot][2] += int(score[0] > score[1])      # Win.
                scores[bot][3] += int(score[0] == score[1])     # Draw.
                scores[bot][4] += int(score[1] > score[0])      # Loss.
            total += 1
    except KeyboardInterrupt:
        print "\nTerminating competition due to keyboard interrupt."
        p.terminate()
        p.join()
    else:        
        print "\n"
        for r, s in sorted(scores.items(), key = lambda i: i[1][2]*30 + i[1][3]*10 + i[1][0] - i[1][1], reverse = True):
            nick = r.replace('Commander', '')
            if nick in mycmd: continue

            print "{}\n\tCaptured {} flags and conceded {}.\n\tWon {}, drew {} and lost {}.\n".format(nick.upper(), *s)

        print '\n\nAll matches played against {}; best opponent at top of list.\n'.format(mycmd)
Beispiel #28
0
def _run_with_multiprocessing(
    process, total_tiles, zoom_levels, multi, quiet, debug
):
    LOGGER.debug("run with multiprocessing")
    num_processed = 0
    LOGGER.info("run process using %s workers", multi)
    f = partial(_process_worker, process)
    with tqdm.tqdm(
        total=total_tiles, unit="tiles", disable=(quiet or debug)
    ) as pbar:
        for zoom in zoom_levels:
            process_tiles = process.get_process_tiles(zoom)
            pool = Pool(multi)
            try:
                for tile, output in pool.imap_unordered(
                    f, process_tiles,
                    # set chunksize to between 1 and MAX_CHUNKSIZE
                    chunksize=min([
                        max([total_tiles // multi, 1]), MAX_CHUNKSIZE
                    ])
                ):
                    pbar.update()
                    num_processed += 1
            except KeyboardInterrupt:
                LOGGER.info(
                    "Caught KeyboardInterrupt, terminating workers")
                pool.terminate()
                break
            except Exception:
                pool.terminate()
                raise
            finally:
                pool.close()
                pool.join()
                process_tiles = None
    LOGGER.info("%s tile(s) iterated", (str(num_processed)))
Beispiel #29
0
class SinglePool(Concurrency):
    name = 'Concurrency: SinglePool'

    def __init__(self, **kwargs):
        self.pool = None
        super().__init__(**kwargs)

    def __initialize(self, solver, **kwargs):
        if self.pool is not None:
            kwargs['output'].debug(2, 2, 'Pool already inited')
        else:
            self.pool = Pool(
                processes=self.processes,
                initializer=initializer,
                initargs=(kwargs['instance'], solver)
            )
            kwargs['output'].debug(2, 2, 'Init pool with %d processes' % self.processes)

    def __solve(self, tasks, **kwargs):
        output = kwargs['output']
        res_list, results = [], []
        for task in tasks:
            res = self.pool.apply_async(solve, (task,))
            res_list.append(res)

        while len(res_list) > 0:
            res_list[0].wait()

            i = 0
            while i < len(res_list):
                if res_list[i].ready():
                    res = res_list.pop(i)
                    try:
                        results.append(res.get())
                    except Exception as e:
                        output.debug(0, 1, 'Pool solving was completed unsuccessfully: %s', e)
                else:
                    i += 1

            output.debug(2, 3, 'Already solved %d tasks' % len(results))

        if not self.keep:
            self.terminate()

        return [result.set_value(self.measure.get(result)) for result in results]

    def single(self, task: Task, **kwargs) -> Result:
        cnf = kwargs['instance'].cnf().to_str(task.get())
        report = self.propagator.solve(cnf)

        result = task.resolve(report.status, report.time, {}, report.solution)
        return result.set_value(self.measure.get(result))

    def propagate(self, tasks: List[Task], **kwargs) -> List[Result]:
        self.__initialize(self.propagator, **kwargs)
        return self.__solve(tasks, **kwargs)

    def solve(self, tasks: List[Task], **kwargs) -> List[Result]:
        self.__initialize(self.solver, **kwargs)
        return self.__solve(tasks, **kwargs)

    def terminate(self):
        self.pool.terminate()
        self.pool = None
Beispiel #30
0
    def __init__(self, game_board, depth, root):

        self.parent = []
        self.expectimax_val = []
        self.move = []

        # If root node
        if root:
            
            # Use parallel execution
            self.game_board = game_board
            usePool = 0
            if usePool:

                # Calculate the tree using a multiple process
                pool = Pool(processes=4)
                validMove = checkValidMoves(game_board)
                try:
                    # Evaluaate the branches of the tree
                    if validMove[0]:
                        # self.right = MoveNode(right_move_return(game_board), depth)
                        right_game_board = right_move_return(game_board)
                        right_depth = depth
                        right_result = pool.apply_async(MoveNode, (right_game_board, right_depth))
                    else:
                        self.left = []
                    if validMove[1]:
                        # self.left = MoveNode(left_move_return(game_board), depth)
                        left_game_board = left_move_return(game_board)
                        left_depth = depth
                        left_result = pool.apply_async(MoveNode, (left_game_board, left_depth))
                    else:
                        self.right = []
                    if validMove[2]:
                        # self.up = MoveNode(up_move_return(game_board), depth)
                        up_game_board = up_move_return(game_board)
                        up_depth = depth
                        up_result = pool.apply_async(MoveNode, (up_game_board, up_depth))
                    else:
                        self.up = []
                    if validMove[3]:
                        # self.down = MoveNode(down_move_return(game_board), depth)
                        down_game_board = down_move_return(game_board)
                        down_depth = depth
                        down_result = pool.apply_async(MoveNode, (down_game_board, down_depth))
                    else:
                        self.down = []
                    
                    # Get the results from parallel pool
                    if validMove[0]:
                        self.right = right_result.get()
                    if validMove[1]:
                        self.left = left_result.get()
                    if validMove[2]:
                        self.up = up_result.get()
                    if validMove[3]:
                        self.down = down_result.get()
                
                # Close the pool on exception
                except:
                    pool.close()
                    pool.terminate()
                    pool.join()
                # Close the pool
                pool.close()

            # Not using the parallel pool
            else:
			validMove = checkValidMoves(game_board)
			if validMove[0]:
				self.right = MoveNode(right_move_return(game_board), depth)
			else:
				self.left = []
			if validMove[1]:
				self.left = MoveNode(left_move_return(game_board), depth)
			else:
				self.right = []
			if validMove[2]:
				self.up = MoveNode(up_move_return(game_board), depth)
			else:
				self.up = []
			if validMove[3]:
				self.down = MoveNode(down_move_return(game_board), depth)
			else:
				self.down = []

        # Build branches not from the root node
        else:
            # self.left = MoveNode(left_move_return(game_board), depth)
            # self.right = MoveNode(right_move_return(game_board), depth)
            # self.up = MoveNode(up_move_return(game_board), depth)
            # self.down = MoveNode(down_move_return(game_board), depth)
            validMove = checkValidMoves(game_board)

            if validMove[0]:
                self.right = MoveNode(right_move_return(game_board), depth)
            else:
                self.left = []
            if validMove[1]:
                self.left = MoveNode(left_move_return(game_board), depth)
            else:
                self.right = []
            if validMove[2]:
                self.up = MoveNode(up_move_return(game_board), depth)
            else:
                self.up = []
            if validMove[3]:
                self.down = MoveNode(down_move_return(game_board), depth)
            else:
                self.down = []
Beispiel #31
0
    levels = ['map00', 'map01', 'map10', 'map11', 'map20', 'map30']

    pairs = itertools.product([mycmd], competitors)
    games = list(itertools.product(levels, pairs))

    print "Running against %i commanders on %i levels, for a total of %i games.\n" % (len(competitors), len(levels), len(games))
    try:
        for level, results in p.map(run, games):
            for (_, bot), score in results.items():
                scores.setdefault(bot, [0, 0, 0, 0, 0])
                scores[bot][0] += score[0]                      # Flags captured.
                scores[bot][1] += score[1]                      # Flags conceded.
                scores[bot][2] += int(score[0] > score[1])      # Win.
                scores[bot][3] += int(score[0] == score[1])     # Draw.
                scores[bot][4] += int(score[1] > score[0])      # Loss.
            total += 1
    except KeyboardInterrupt:
        print "\nTerminating competition due to keyboard interrupt."
        p.terminate()
        p.join()
    else:        
        print "\n"
        for r, s in sorted(scores.items(), key = lambda i: i[1][2]*30 + i[1][3]*10 + i[1][0] - i[1][1], reverse = True):
            nick = r.replace('Commander', '')
            if nick in mycmd: continue

            print "{}\n\tCaptured {} flags and conceded {}.\n\tWon {}, drew {} and lost {}.\n".format(nick.upper(), *s)

        print '\n\nAll matches played against {}; best opponent at top of list.\n'.format(mycmd)

Beispiel #32
0
def train_loop(manager: Manager, log_queue: Queue):
    pool = Pool(processes=hp.NUM_POOL_WORKERS,
                initializer=pool_worker_init,
                initargs=(log_queue, ),
                maxtasksperchild=hp.MAX_GAMES_PER_POOL_WORKER)

    history_queue_file = "%s/history-queue.h5" % hp.OUTPUT_DIR
    if os.path.exists(history_queue_file):
        logging.info("Loading history queue from file: %s", history_queue_file)
        history_queue = manager.list(
            np.rec.array(
                util.read_from_hdf5_file(history_queue_file,
                                         "history_queue")[:]))
    else:
        history_queue = manager.list()

    self_play_model_file = hp.SELF_PLAY_MODEL_FILE
    trained_model_file = hp.TRAIN_MODEL_FILE

    num_clients_per_predict_worker = hp.NUM_POOL_WORKERS // hp.NUM_PREDICT_WORKERS + 1
    self_play_predict_workers, self_play_model_wstate = spawn_predict_proxies(
        hp.NUM_PREDICT_WORKERS, num_clients_per_predict_worker,
        self_play_model_file, manager, history_queue, log_queue)

    train_workers, train_model_wstate = spawn_predict_proxies(
        1, hp.NUM_POOL_WORKERS, self_play_model_file, manager, history_queue,
        log_queue)
    assert len(train_workers) == 1
    train_worker = train_workers[0]

    try:
        for iter_index in range(hp.START_ITER, hp.START_ITER + hp.NUM_ITER):
            # self-play
            logging.info("Iter %d: Starting self-play", iter_index)
            self_play_results = pool.starmap(
                self_play_worker,
                zip(repeat(iter_index), range(hp.NUM_GAMES),
                    repeat(history_queue), repeat(self_play_model_wstate)))
            logging.info("Iter %d, self-play results: %s", iter_index,
                         self_play_results)

            # train NN
            logging.info("Iter %d: Starting network train", iter_index)
            train_worker.control_pipe.send((ControlActions.TRAIN, iter_index))
            act, result = train_worker.control_pipe.recv()
            assert act == ControlActions.TRAIN_COMPLETED
            logging.info("Iter %d: Ended network train", iter_index)

            # eval
            logging.info("Iter %d: Starting evaluation", iter_index)
            eval_results = pool.starmap(
                multi_player_worker,
                zip(repeat(iter_index), range(hp.NUM_EVAL_GAMES),
                    repeat(train_model_wstate),
                    repeat(self_play_model_wstate)))
            logging.info("Iter %d: Evaluation end: results: %s", iter_index,
                         eval_results)
            outcomes = np.array([outcome for _, outcome in eval_results])
            trained_model_win_ratio = np.sum(
                outcomes == hp.OUTCOME_WIN_PLAYER_1) / len(outcomes)

            logging.info(
                "Iter %d evaluation: trained_model_win percent : %.2f%%",
                iter_index, trained_model_win_ratio * 100)

            if trained_model_win_ratio > hp.MIN_MODEL_REPLACEMENT_WIN_RATIO:
                stop_and_join_workers(*self_play_predict_workers)
                del self_play_model_wstate
                del self_play_predict_workers

                self_play_model_file = "%s/model-best-%00d-%.0f.h5" % (
                    hp.OUTPUT_DIR, iter_index, trained_model_win_ratio * 100)
                save_worker_model(train_worker, self_play_model_file)

                self_play_predict_workers, self_play_model_wstate = spawn_predict_proxies(
                    hp.NUM_PREDICT_WORKERS, num_clients_per_predict_worker,
                    self_play_model_file, manager, history_queue, log_queue)

            trained_model_file = "%s/model-train-%00d-%.0f.h5" % (
                hp.OUTPUT_DIR, iter_index, trained_model_win_ratio * 100)
            save_worker_model(train_worker, trained_model_file)

            util.save_to_hdf5_file({"history_queue": list(history_queue)},
                                   history_queue_file,
                                   compression='gzip')

            gc.collect()
        # end iter loop
    except Exception as e:
        if e is KeyboardInterrupt or e is SystemExit:
            logging.info("Terminated by user.")
        else:
            logging.error("Error: %s", e)
        pool.terminate()
        pool.join()
        terminate_workers(train_worker, *self_play_predict_workers)
        raise e
    else:
        stop_and_join_workers(train_worker, *self_play_predict_workers)
        logging.info('Done successfully.')
    finally:
        pool.close()
Beispiel #33
0
class _LocalRunner(_Runner):
    def __init__(self, max_tries = 3):
        super(_LocalRunner,self).__init__(max_tries)
        self.pool = None

    @classmethod
    def is_local(cls):
        return True
    
    @classmethod
    def max_tasks(cls):
        return cpu_count()
    
    def run(self,job):
        if isinstance(job, SplittableJob):
            job._split_to_tasks()
            jobs = job.tasks

        elif isinstance(job, collections.Callable):
            jobs = [job]

        else:
            jobs = job
            

#        for i,j in enumerate(jobs):
#            jw = JobWrapper(j)
#            jw()
            
        try:
            self.pool = Pool()
            results = [self.pool.apply_async(JobWrapper(j)) for j in jobs]

            for try_n in xrange(self.max_tries):

                for i, result in enumerate(results):
                    if result is not None:
                        result.wait()
                        if result.successful() and jobs[i].test_success():
                            results[i] = None
                        else:
                            try:
                                jobs[i].prepare_retry()
                            except TypeError:
                                pass

                            results[i] = self.pool.apply_async(JobWrapper(jobs[i]))

                    #Stop loop if all results were OK
                    if all(r is None for r in results):
                        break

            self.pool.close()
            self.pool.join()
            self.pool = None

            if not all(r is None for r in results):
                raise JobFailedException

        except (KeyboardInterrupt, SystemExit):
            if self.pool is not None:
                self.pool.terminate()
                self.pool.join()
                self.pool = None
            raise

        if isinstance(job, SplittableJob):
            job._merge_tasks()
Beispiel #34
0
class LLTInf(object):
    """Obtains a decision tree that classifies the given labeled traces.

    traces : a Traces object
             The set of labeled traces to use as training set
    depth : integer
            Maximum depth to be reached
    optimize_impurity : function. Optional, defaults to optimize_inf_gain
                        A function that obtains the best parameters for a test
                        in a given node according to some impurity measure. The
                        should have the following prototype:
                            optimize_impurity(traces, primitive, rho, disp) :
                                (primitive, impurity)
                        where traces is a Traces object, primitive is a depth 2
                        STL formula, rho is a list with the robustness degree of
                        each trace up until this node in the tree and disp is a
                        boolean that switches output display. The impurity
                        returned should be so that the best impurity is the
                        minimum one.
    stop_condition : list of functions. Optional, defaults to [perfect_stop]
                     list of stopping conditions. Each stopping condition is a
                     function from a dictionary to boolean. The dictionary
                     contains all the information passed recursively during the
                     construction of the decision tree (see arguments of
                     lltinf_).
    disp : a boolean
           Switches displaying of debuggin output

    Returns a DTree object.

    TODO: Fix comments

    """
    def __init__(
        self,
        depth=1,
        primitive_factory=llt.make_llt_primitives,
        optimize_impurity=impurity.ext_inf_gain,
        stop_condition=None,
        redo_after_failed=1,
        optimizer_args=None,
        times=None,
        fallback_impurity=impurity.inf_gain,
        log=False,
    ):
        self.depth = depth
        self.primitive_factory = primitive_factory
        self.optimize_impurity = optimize_impurity
        self.fallback_impurity = fallback_impurity
        if stop_condition is None:
            self.stop_condition = [perfect_stop]
        else:
            self.stop_condition = stop_condition
        if optimizer_args is None:
            optimizer_args = {}
        self.optimizer_args = optimizer_args
        self.times = times
        self.interpolate = times is not None
        if self.interpolate and len(self.times) > 1:
            self.tinter = self.times[1] - self.times[0]
        else:
            self.tinter = None
        self.tree = None
        self.redo_after_failed = redo_after_failed
        self._partial_add = 0
        self.log = log
        if "workers" not in self.optimizer_args:
            self.pool = Pool(initializer=_pool_initializer)

            def pool_map(func, iterable):
                try:
                    return self.pool.map_async(func, iterable).get(timeout=120)
                except KeyboardInterrupt:
                    self.pool.terminate()
                    self.pool.join()
                    raise KeyboardInterrupt()

            self.pool_map = pool_map
            self.optimizer_args["workers"] = self.pool_map

    def __del__(self):
        if hasattr(self, "pool"):
            self.pool.terminate()
            self.pool.join()

    def __exit__(self):
        if hasattr(self, "pool"):
            self.pool.terminate()
            self.pool.join()

    def fit(self, traces, disp=False):
        np.seterr(all="ignore")
        self.tree = self._lltinf(traces, None, self.depth, disp=disp)
        return self

    def fit_partial(self, traces, disp=False):
        if self.tree is None:
            return self.fit(traces, disp=disp)
        else:
            preds = self.predict(traces.signals)
            failed = set()
            for i in range(len(preds)):
                leaf = self.tree.add_signal(traces.signals[i],
                                            traces.labels[i], self.interpolate,
                                            self.tinter)
                if preds[i] != traces.labels[i]:
                    failed.add(leaf)

            # logger.debug("Failed set: {}".format(failed))

            self._partial_add += len(failed)
            if self._partial_add // self.redo_after_failed > 0:
                # logger.debug("Redoing tree")
                self._partial_add = 0
                return self.fit(self.tree.traces, disp=disp)
            else:
                for leaf in failed:
                    # TODO don't redo whole node, only leaf
                    tree = self._lltinf(
                        leaf.traces,
                        leaf.robustness,
                        self.depth - leaf.level(),
                        disp=disp,
                    )
                    old_tree = leaf.copy()
                    leaf.set_tree(tree)

                # FIXME only for perfect_stop
                preds = self.predict(traces.signals)
                if not np.array_equal(preds, traces.labels):
                    self._partial_add = 0
                    return self.fit(self.tree.traces, disp=disp)
                return self

    def predict(self, signals):
        if self.tree is not None:
            return np.array([
                self.tree.classify(s, self.interpolate, self.tinter)
                for s in signals
            ])
        else:
            raise ValueError("Model not fit")

    def get_formula(self):
        if self.tree is not None:
            return self.tree.get_formula()
        else:
            raise ValueError("Model not fit")

    def _debug(self, *args):
        if self.log:
            logger.debug(*args)

    def _lltinf(self, traces, rho, depth, disp=False, override_impurity=None):
        """Recursive call for the decision tree construction.

        See lltinf for information on similar arguments.

        rho : list of numerics
            List of robustness values for each trace up until the current node
        depth : integer
                Maximum depth to be reached. Decrements for each recursive call
        """
        # Stopping condition
        if any(
            [stop(self, traces, rho, depth) for stop in self.stop_condition]):
            return None

        # Find primitive using impurity measure
        self._debug(
            f"Creating primitives at depth {depth} over {len(traces)} traces")
        primitives = self.primitive_factory(traces.signals, traces.labels)
        if override_impurity is None:
            impurity = self.optimize_impurity
        else:
            impurity = override_impurity
        self._debug(
            f"Finding best primitive at depth {depth} over {len(traces)} traces"
        )
        primitive, impurity = _find_best_primitive(
            traces,
            primitives,
            rho,
            impurity,
            disp,
            self.optimizer_args,
            times=self.times,
            interpolate=self.interpolate,
            tinter=self.tinter,
        )
        if disp:
            print("Best: {} ({})".format(primitive, impurity))
        self._debug(f"Best primitive found: {primitive} (imp: {impurity})")

        # Classify using best primitive and split into groups
        prim_rho = [
            primitive.score(model)
            for model in traces.models(self.interpolate, self.tinter)
        ]
        if rho is None:
            rho = [np.inf for i in traces.labels]
        tree = DTree(primitive, traces, rho)

        def split(prim_rho):
            sat, unsat = [], []
            for i, rho in enumerate(prim_rho):
                if rho >= 0:
                    sat.append(i)
                else:
                    unsat.append(i)

            return sat, unsat

        # [prim_rho, rho, signals, label]
        # sat_, unsat_ = split_groups(
        #     list(zip(prim_rho, rho, *traces.as_list())), lambda x: x[0] >= 0
        # )
        sat_, unsat_ = split(prim_rho)
        self._debug(f"Split: {len(sat_)}/{len(unsat_)}")

        # pure_wrong = all([t[3] <= 0 for t in sat_]) or all([t[3] >= 0 for t in unsat_])
        # pure_right = all([t[3] >= 0 for t in sat_]) or all([t[3] <= 0 for t in unsat_])
        sat_right = len([i for i in sat_ if traces.labels[i] >= 0])
        sat_wrong = len(sat_) - sat_right
        unsat_right = len([i for i in unsat_ if traces.labels[i] <= 0])
        unsat_wrong = len(unsat_) - unsat_right
        # Switch sat and unsat if labels are wrong. No need to negate prim rho since
        # we use it in absolute value later
        if sat_right * unsat_right == 0 or (sat_wrong * unsat_wrong != 0
                                            and sat_right < unsat_wrong):
            self._debug(f"Inverting primitive")

            sat_, unsat_ = unsat_, sat_
            tree.primitive.negate()

        # No further classification possible
        if len(sat_) == 0 or len(unsat_) == 0:
            self._debug("No further classification possible")
            if override_impurity is None:
                self._debug("Attempting to classify using impurity fallback")
                return self._lltinf(
                    traces,
                    rho,
                    depth,
                    disp=disp,
                    override_impurity=self.fallback_impurity,
                )
            else:
                return None

        # Redo data structures
        sat_traces, unsat_traces = [
            traces.subset(traces, idxs) for idxs in [sat_, unsat_]
        ]
        sat_rho, unsat_rho = [
            np.amin(
                [np.abs([prim_rho[i] for i in idxs]), [rho[i]
                                                       for i in idxs]], 0)
            for idxs in [sat_, unsat_]
        ]
        # sat, unsat = [
        #     (Traces(*group[2:]), np.amin([np.abs(group[0]), group[1]], 0))
        #     for group in [list(zip(*sat_)), list(zip(*unsat_))]
        # ]

        # Recursively build the tree
        tree.left = self._lltinf(sat_traces, sat_rho, depth - 1, disp=disp)
        tree.right = self._lltinf(unsat_traces,
                                  unsat_rho,
                                  depth - 1,
                                  disp=disp)

        return tree
    def run(self,
            model,
            epoch=600,
            batchsize=16,
            learning_rate=0.0001,
            early_rejection=False,
            valid_interval=10,
            tag='',
            save_result=True,
            checkpoint='',
            pretrain=False,
            skip_train=False,
            validate_train=True,
            validate_valid=True,
            logdir='/data/public/rw/kaggle-data-science-bowl/logs/',
            **kwargs):
        self.set_network(model, batchsize)
        ds_train, ds_valid, ds_valid_full, ds_test = self.network.get_input_flow(
        )
        self.network.build()
        print(HyperParams.get().__dict__)

        net_output = self.network.get_output()
        net_loss = self.network.get_loss()

        global_step = tf.Variable(0, trainable=False)
        learning_rate_v, train_op = self.network.get_optimize_op(
            global_step=global_step, learning_rate=learning_rate)

        best_loss_val = 999999
        best_miou_val = 0.0
        name = '%s_%s_lr=%.8f_epoch=%d_bs=%d' % (
            tag if tag else datetime.datetime.now().strftime("%y%m%dT%H%M%f"),
            model,
            learning_rate,
            epoch,
            batchsize,
        )
        model_path = os.path.join(KaggleSubmission.BASEPATH, name, 'model')
        best_ckpt_saver = BestCheckpointSaver(save_dir=model_path,
                                              num_to_keep=100,
                                              maximize=True)

        saver = tf.train.Saver()
        m_epoch = 0

        # initialize session
        self.init_session()

        # tensorboard
        tf.summary.scalar('loss', net_loss, collections=['train', 'valid'])
        s_train = tf.summary.merge_all('train')
        s_valid = tf.summary.merge_all('valid')
        train_writer = tf.summary.FileWriter(logdir + name + '/train',
                                             self.sess.graph)
        valid_writer = tf.summary.FileWriter(logdir + name + '/valid',
                                             self.sess.graph)

        logger.info('initialization+')
        if not checkpoint:
            self.sess.run(tf.global_variables_initializer())

            if pretrain:
                global_vars = tf.global_variables()

                from tensorflow.python import pywrap_tensorflow
                reader = pywrap_tensorflow.NewCheckpointReader(
                    self.network.get_pretrain_path())
                var_to_shape_map = reader.get_variable_to_shape_map()
                saved_vars = list(var_to_shape_map.keys())

                var_list = [
                    x for x in global_vars
                    if x.name.replace(':0', '') in saved_vars
                ]
                var_list = [x for x in var_list if 'logit' not in x.name]
                logger.info('pretrained weights(%d) loaded : %s' %
                            (len(var_list), self.network.get_pretrain_path()))

                pretrain_loader = tf.train.Saver(var_list)
                pretrain_loader.restore(self.sess,
                                        self.network.get_pretrain_path())
        elif checkpoint == 'best':
            path = get_best_checkpoint(model_path)
            saver.restore(self.sess, path)
            logger.info('restored from best checkpoint, %s' % path)
        elif checkpoint == 'latest':
            path = tf.train.latest_checkpoint(model_path)
            saver.restore(self.sess, path)
            logger.info('restored from latest checkpoint, %s' % path)
        else:
            saver.restore(self.sess, checkpoint)
            logger.info('restored from checkpoint, %s' % checkpoint)

        step = self.sess.run(global_step)
        start_e = (batchsize * step) // len(CellImageDataManagerTrain.LIST)

        logger.info('training started+')
        if epoch > 0 and not skip_train:
            try:
                losses = []
                for e in range(start_e, epoch):
                    loss_val_avg = []
                    train_cnt = 0
                    for dp_train in ds_train.get_data():
                        _, loss_val, summary_train = self.sess.run(
                            [train_op, net_loss, s_train],
                            feed_dict=self.network.get_feeddict(
                                dp_train, True))
                        loss_val_avg.append(loss_val)
                        train_cnt += 1

                    step, lr = self.sess.run([global_step, learning_rate_v])
                    loss_val_avg = sum(loss_val_avg) / len(loss_val_avg)
                    logger.info(
                        'training %d epoch %d step, lr=%.8f loss=%.4f train_iter=%d'
                        % (e + 1, step, lr, loss_val_avg, train_cnt))
                    losses.append(loss_val)
                    train_writer.add_summary(summary_train, global_step=step)

                    if early_rejection and len(losses) > 100 and losses[
                            len(losses) - 100] * 1.05 < loss_val_avg:
                        logger.info('not improved, stop at %d' % e)
                        break

                    # early rejection
                    if early_rejection and ((e == 50 and loss_val > 0.5) or
                                            (e == 200 and loss_val > 0.2)):
                        logger.info('not improved training loss, stop at %d' %
                                    e)
                        break

                    m_epoch = e
                    avg = 10.0
                    if loss_val < 0.20 and (e + 1) % valid_interval == 0:
                        avg = []
                        for _ in range(5):
                            ds_valid.reset_state()
                            ds_valid_d = ds_valid.get_data()
                            for dp_valid in ds_valid_d:
                                loss_val, summary_valid = self.sess.run(
                                    [net_loss, s_valid],
                                    feed_dict=self.network.get_feeddict(
                                        dp_valid, False))

                                avg.append(loss_val)
                            ds_valid_d.close()

                        avg = sum(avg) / len(avg)
                        logger.info('validation loss=%.4f' % (avg))
                        if best_loss_val > avg:
                            best_loss_val = avg
                        valid_writer.add_summary(summary_valid,
                                                 global_step=step)

                    if avg < 0.16 and e >= 100 and (e +
                                                    1) % valid_interval == 0:
                        cnt_tps = np.array((len(thr_list)), dtype=np.int32),
                        cnt_fps = np.array((len(thr_list)), dtype=np.int32)
                        cnt_fns = np.array((len(thr_list)), dtype=np.int32)
                        pool_args = []
                        ds_valid_full.reset_state()
                        ds_valid_full_d = ds_valid_full.get_data()
                        for idx, dp_valid in tqdm(
                                enumerate(ds_valid_full_d),
                                desc='validate using the iou metric',
                                total=len(CellImageDataManagerValid.LIST)):
                            image = dp_valid[0]
                            inference_result = self.network.inference(
                                self.sess, image, cutoff_instance_max=0.9)
                            instances, scores = inference_result[
                                'instances'], inference_result['scores']
                            pool_args.append(
                                (thr_list, instances, dp_valid[2]))
                        ds_valid_full_d.close()

                        pool = Pool(processes=8)
                        cnt_results = pool.map(do_get_multiple_metric,
                                               pool_args)
                        pool.close()
                        pool.join()
                        pool.terminate()
                        for cnt_result in cnt_results:
                            cnt_tps = cnt_tps + cnt_result[0]
                            cnt_fps = cnt_fps + cnt_result[1]
                            cnt_fns = cnt_fns + cnt_result[2]

                        ious = np.divide(cnt_tps, cnt_tps + cnt_fps + cnt_fns)
                        mIou = np.mean(ious)
                        logger.info('validation metric: %.5f' % mIou)
                        if best_miou_val < mIou:
                            best_miou_val = mIou
                        best_ckpt_saver.handle(
                            mIou, self.sess,
                            global_step)  # save & keep best model

                        # early rejection by mIou
                        if early_rejection and e > 50 and best_miou_val < 0.15:
                            break
                        if early_rejection and e > 100 and best_miou_val < 0.25:
                            break
            except KeyboardInterrupt:
                logger.info('interrupted. stop training, start to validate.')

        try:
            chk_path = get_best_checkpoint(model_path,
                                           select_maximum_value=True)
            if chk_path:
                logger.info(
                    'training is done. Start to evaluate the best model. %s' %
                    chk_path)
                saver.restore(self.sess, chk_path)
        except Exception as e:
            logger.warning('error while loading the best model:' + str(e))

        # show sample in train set : show_train > 0
        kaggle_submit = KaggleSubmission(name)
        if validate_train in [True, 'True', 'true']:
            logger.info('Start to test on training set.... (may take a while)')
            train_metrics = []
            for single_id in tqdm(CellImageDataManagerTrain.LIST[:20],
                                  desc='training set test'):
                result = self.single_id(None,
                                        None,
                                        single_id,
                                        set_type='train',
                                        show=False,
                                        verbose=False)
                image = result['image']
                labels = result['labels']
                instances = result['instances']
                score = result['score']
                score_desc = result['score_desc']

                img_vis = Network.visualize(image, labels, instances, None)
                kaggle_submit.save_train_image(single_id,
                                               img_vis,
                                               score=score,
                                               score_desc=score_desc)
                train_metrics.append(score)
            logger.info('trainset validation ends. score=%.4f' %
                        np.mean(train_metrics))

        # show sample in valid set : show_valid > 0
        if validate_valid in [True, 'True', 'true']:
            logger.info(
                'Start to test on validation set.... (may take a while)')
            valid_metrics = []
            for single_id in tqdm(CellImageDataManagerValid.LIST,
                                  desc='validation set test'):
                result = self.single_id(None,
                                        None,
                                        single_id,
                                        set_type='train',
                                        show=False,
                                        verbose=False)
                image = result['image']
                labels = result['labels']
                instances = result['instances']
                score = result['score']
                score_desc = result['score_desc']

                img_vis = Network.visualize(image, labels, instances, None)
                kaggle_submit.save_valid_image(single_id,
                                               img_vis,
                                               score=score,
                                               score_desc=score_desc)
                kaggle_submit.valid_instances[single_id] = (
                    instances, result['instance_scores'])
                valid_metrics.append(score)
            logger.info('validation ends. score=%.4f' % np.mean(valid_metrics))

        # show sample in test set
        logger.info('saving...')
        if save_result:
            for i, single_id in tqdm(
                    enumerate(CellImageDataManagerTest.LIST),
                    total=len(CellImageDataManagerTest.LIST)):  # TODO
                try:
                    result = self.single_id(None, None, single_id, 'test',
                                            False, False)
                except Exception as e:
                    logger.warning('single_id=%s err=%s' % (single_id, str(e)))
                    continue
                image = result['image']
                instances = result['instances']
                img_h, img_w = image.shape[:2]

                img_vis = Network.visualize(image, None, instances, None)

                # save to submit
                instances = Network.resize_instances(instances, (img_h, img_w))
                kaggle_submit.save_image(single_id, img_vis)
                kaggle_submit.test_instances[single_id] = (
                    instances, result['instance_scores'])
                kaggle_submit.add_result(single_id, instances)
                # for single_id in tqdm(CellImageDataManagerTest.LIST[1120:], desc='test set evaluation'):
                #     result = self.single_id(None, None, single_id, set_type='test', show=False, verbose=False)

                # temporal saving
                if i % 500 == 0:
                    kaggle_submit.save()
            kaggle_submit.save()
        logger.info(
            'done. epoch=%d best_loss_val=%.4f best_mIOU=%.4f name= %s' %
            (m_epoch, best_loss_val, best_miou_val, name))
        return best_miou_val, name
Beispiel #36
0
def batch(frames, diameter, output=None, meta=None, processes=1,
          after_locate=None, **kwargs):
    """Locate Gaussian-like blobs of some approximate size in a set of images.

    Preprocess the image by performing a band pass and a threshold.
    Locate all peaks of brightness, characterize the neighborhoods of the peaks
    and take only those with given total brightness ("mass"). Finally,
    refine the positions of each peak.

    Parameters
    ----------
    frames : list (or iterable) of images
        The frames to process.
    diameter : odd integer or tuple of odd integers
        This may be a single number or a tuple giving the feature's
        extent in each dimension, useful when the dimensions do not have
        equal resolution (e.g. confocal microscopy). The tuple order is the
        same as the image shape, conventionally (z, y, x) or (y, x). The
        number(s) must be odd integers. When in doubt, round up.
    output : {None, trackpy.PandasHDFStore, SomeCustomClass}
        If None, return all results as one big DataFrame. Otherwise, pass
        results from each frame, one at a time, to the put() method
        of whatever class is specified here.
    meta : filepath or file object, optional
        If specified, information relevant to reproducing this batch is saved
        as a YAML file, a plain-text machine- and human-readable format.
        By default, this is None, and no file is saved.
    processes : integer or "auto", optional
        The number of processes to use in parallel. If <= 1, multiprocessing is
        disabled. If "auto", the number returned by `os.cpu_count()`` is used.
    after_locate : function, optional
        Specify a custom function to apply to the detected features in each
        processed frame. It must accept the following arguments:

        - ``frame_no``: an integer specifying the number of the current frame.
        - ``features``: a DataFrame containing the detected features.

        Furthermore it must return a DataFrame like ``features``.
    **kwargs :
        Keyword arguments that are passed to the wrapped `trackpy.locate`.
        Refer to its docstring for further details.

    Returns
    -------
    DataFrame([x, y, mass, size, ecc, signal])
        where mass means total integrated brightness of the blob,
        size means the radius of gyration of its Gaussian-like profile,
        and ecc is its eccentricity (0 is circular).

    See Also
    --------
    locate : performs location on a single image

    Notes
    -----
    This is a convenience function that wraps `trackpy.locate` (see its
    docstring for further details) and allows batch processing of multiple
    frames, optionally in parallel by using multiprocessing.
    """
    if "raw_image" in kwargs:
        raise KeyError("the argument `raw_image` musn't be in `kwargs`, it is "
                       "provided internally by `frames`")
    # Add required keyword argument
    kwargs["diameter"] = diameter

    if meta:
        # Gather meta information and save as YAML in current directory.
        try:
            source = frames.filename
        except AttributeError:
            source = None
        meta_info = dict(
            timestamp=pd.datetime.utcnow().strftime('%Y-%m-%d-%H%M%S'),
            trackpy_version=trackpy.__version__,
            source=source,
            **kwargs
        )
        if isinstance(meta, six.string_types):
            with open(meta, 'w') as file_obj:
                record_meta(meta_info, file_obj)
        else:
            # Interpret meta to be a file handle.
            record_meta(meta_info, meta)

    # Prepare wrapped function for mapping to `frames`
    curried_locate = partial(locate, **kwargs)

    # Handle & validate argument `processes`
    if processes == "auto":
        processes = None  # Is replaced with `os.cpu_count` in Pool
    elif not isinstance(processes, six.integer_types):
        raise TypeError("`processes` must either be an integer or 'auto', "
                        "was type {}".format(type(processes)))

    if processes is None or processes > 1:
        # Use multiprocessing
        pool = Pool(processes=processes)
        map_func = pool.imap
    else:
        pool = None
        map_func = map

    if after_locate is None:
        def after_locate(frame_no, features):
            return features

    try:
        all_features = []
        for i, features in enumerate(map_func(curried_locate, frames)):
            image = frames[i]
            if hasattr(image, 'frame_no') and image.frame_no is not None:
                frame_no = image.frame_no
                # If this works, locate created a 'frame' column.
            else:
                frame_no = i
                features['frame'] = i  # just counting iterations
            features = after_locate(frame_no, features)

            logger.info("Frame %d: %d features", frame_no, len(features))
            if len(features) > 0:
                # Store if features were found
                if output is None:
                    all_features.append(features)
                else:
                    output.put(features)
    finally:
        if pool:
            # Ensure correct termination of Pool
            pool.terminate()

    if output is None:
        if len(all_features) > 0:
            return pandas_concat(all_features).reset_index(drop=True)
        else:  # return empty DataFrame
            warnings.warn("No maxima found in any frame.")
            return pd.DataFrame(columns=list(features.columns) + ['frame'])
    else:
        return output
Beispiel #37
0
def MP_sed_fit(spec_or_photo,
               igals,
               sim='lgal',
               noise='none',
               method='ifsps',
               model='emulator',
               nthreads=1,
               nwalkers=100,
               burnin=100,
               niter=1000,
               maxiter=200000,
               overwrite=False,
               postprocess=False,
               justplot=False):
    ''' multiprocessing wrapepr for fit_spectra and fit_photometry. This does *not* parallelize 
    the MCMC sampling of individual fits but rather runs multiple fits simultaneously. 
    
    :param spec_or_photo: 
        fit spectra or photometry 

    :param igals: 
        array/list of spectral_challenge galaxy indices

    :param noise: 
        If 'none', fit noiseless spectra. 
        If 'bgs1'...'bgs8', fit BGS-like spectra. (default: 'none') 

    :param dust: 
        If True, fit the spectra w/ dust using a model with dust 
        If False, fit the spectra w/o dust using a model without dust. 
        (default: False) 

    :param nthreads: 
        Number of threads. If nthreads == 1, just runs fit_spectra
    '''
    args = igals  # galaxy indices

    kwargs = {
        'sim': sim,
        'noise': noise,
        'method': method,
        'model': model,
        'nwalkers': nwalkers,
        'burnin': burnin,
        'niter': niter,
        'maxiter': maxiter,
        'opt_maxiter': 1000,
        'overwrite': overwrite,
        'postprocess': postprocess,
        'justplot': justplot
    }
    if spec_or_photo == 'spec':
        fit_func = fit_spectra
    elif spec_or_photo == 'photo':
        fit_func = fit_photometry
    elif spec_or_photo == 'specphoto':
        fit_func = fit_spectrophotometry

    if nthreads > 1:
        pool = Pool(processes=nthreads)
        pool.map(partial(fit_func, **kwargs), args)
        pool.close()
        pool.terminate()
        pool.join()
    else:
        # single thread, loop over
        for igal in args:
            fit_func(igal, **kwargs)
    return None
Beispiel #38
0
class TaskManager(WithLogger):
    """A Task manager."""

    def __init__(
        self,
        nb_workers: int = 1,
        is_lazy_pool_start: bool = True,
        logger: Optional[logging.Logger] = None,
    ):
        """
        Initialize the task manager.

        :param nb_workers: the number of worker processes.
        :param is_lazy_pool_start: option to postpone pool creation till the first enqueue_task called.
        """
        WithLogger.__init__(self, logger)
        self._nb_workers = nb_workers
        self._is_lazy_pool_start = is_lazy_pool_start
        self._pool = None  # type: Optional[Pool]
        self._stopped = True
        self._lock = threading.Lock()

        self._task_enqueued_counter = 0
        self._results_by_task_id = {}  # type: Dict[int, Any]

    @property
    def is_started(self) -> bool:
        """
        Get started status of TaskManager.

        :return: bool
        """
        return not self._stopped

    @property
    def nb_workers(self) -> int:
        """
        Get the number of workers.

        :return: int
        """
        return self._nb_workers

    def enqueue_task(
        self, func: Callable, args: Sequence = (), kwds: Optional[Dict[str, Any]] = None
    ) -> int:
        """
        Enqueue a task with the executor.

        :param func: the callable instance to be enqueued
        :param args: the positional arguments to be passed to the function.
        :param kwds: the keyword arguments to be passed to the function.
        :return the task id to get the the result.
        :raises ValueError: if the task manager is not running.
        """
        with self._lock:
            if self._stopped:
                raise ValueError("Task manager not running.")

            if not self._pool and self._is_lazy_pool_start:
                self._start_pool()

            self._pool = cast(Pool, self._pool)
            task_id = self._task_enqueued_counter
            self._task_enqueued_counter += 1
            async_result = self._pool.apply_async(
                func, args=args, kwds=kwds if kwds is not None else {}
            )
            self._results_by_task_id[task_id] = async_result
            return task_id

    def get_task_result(self, task_id: int) -> AsyncResult:
        """
        Get the result from a task.

        :return: async result for task_id
        """
        task_result = self._results_by_task_id.get(
            task_id, None
        )  # type: Optional[AsyncResult]
        if task_result is None:
            raise ValueError("Task id {} not present.".format(task_id))

        return task_result

    def start(self) -> None:
        """
        Start the task manager.

        :return: None
        """
        with self._lock:
            if self._stopped is False:
                self.logger.debug("Task manager already running.")
            else:
                self.logger.debug("Start the task manager.")
                self._stopped = False
                if not self._is_lazy_pool_start:
                    self._start_pool()

    def stop(self) -> None:
        """
        Stop the task manager.

        :return: None
        """
        with self._lock:
            if self._stopped is True:
                self.logger.debug("Task manager already stopped.")
            else:
                self.logger.debug("Stop the task manager.")
                self._stopped = True
                self._stop_pool()

    def _start_pool(self) -> None:
        """
        Start internal task pool.

        Only one pool will be created.

        :return: None
        """
        if self._pool:
            self.logger.debug("Pool was already started!")
            return
        self._pool = Pool(self._nb_workers, initializer=init_worker)

    def _stop_pool(self) -> None:
        """
        Stop internal task pool.

        :return: None
        """
        if not self._pool:
            self.logger.debug("Pool is not started!.")
            return

        self._pool = cast(Pool, self._pool)
        self._pool.terminate()
        self._pool.join()
        self._pool = None
Beispiel #39
0
    def run(cls, reset=False, num_threads=1):
        # -------------------- MCMC ----------------------------
        cls.ndim = cls.mean_model.ndim + cls.gp_model.ndim
        nwalkers = cls.ndim * 4
        nsteps = cls.settings['num_steps']
        # By default we define the initial params from the priors
        set_params = True
        # Check backend status and iterations and compare to config. Reset if flag is true
        if cls.backend is not None:
            # If we have a backend file to fetch the sampler from
            filename = Path(cls.backend.filename)
            if filename.is_file():
                # If we want to reset it, just clear the sampler and get init sample from prior
                if reset:
                    log.info("Resetting the backend sampler")
                    # backend.reset(nwalkers, ndim) # TODO: This line is not working as intended
                    filename.unlink(
                    )  # TODO: Remove the file. Hack because reset is not working
                # Else, init the backend and check if we have more iterations in the sampler than the desired ones
                else:
                    # If we have, stop the code
                    if cls.backend.iteration >= nsteps:
                        log.warn(
                            "Skipping run. Backend number of iterations greater than settings"
                        )
                        return
                    # Otherwise, calculate the remaining steps and continue from there
                    else:
                        nsteps = nsteps - cls.backend.iteration
                        set_params = False

        # If the initial params are not taken from the backend init them from the prior
        init_params = None
        if set_params:
            # Sample priors to get initial values for all walkers
            if cls.gp_model is not None and cls.mean_model is not None:
                init_gp_params = cls.gp_model.sample_prior(num=nwalkers)
                init_mean_params = cls.mean_model.sample_prior(num=nwalkers)
                init_params = np.hstack([init_gp_params, init_mean_params])
            elif cls.mean_model is not None:
                init_params = cls.mean_model.sample_prior(num=nwalkers)
            elif cls.gp_model is not None:
                init_params = cls.gp_model.sample_prior(num=nwalkers)

        # Single or Multiprocessing always uses pool as the init_worker can handle the system interrupts
        pool = Pool(num_threads, cls.init_worker)
        sampler = emcee.EnsembleSampler(nwalkers,
                                        cls.ndim,
                                        cls.lnlike_func,
                                        pool=None,
                                        backend=cls.backend)

        # Run mcmc
        log.info(
            f"Running MCMC on {num_threads} processes for {nsteps} iterations")
        try:
            sampler.run_mcmc(init_params, nsteps, progress=True)
        except KeyboardInterrupt:
            log.warn(f"Emcee was stopped by user input")
            pool.terminate()
            pool.join()
            sys.exit()

        # TODO: get_chain is the new method but it has different dims
        cls.chain = sampler.chain.copy()
        cls.log_prob = sampler.get_log_prob().copy()

        # Save data # TODO: Need to update posterior name and then update filenames
        if cls.settings['save']:
            log.info(f"Saving chain and log_prob")
            with open(cls.datadir / "chain.pk", "wb") as f:
                pickle.dump(sampler.chain, f, protocol=-1)
            with open(cls.datadir / "posterior.pk", "wb") as f:
                pickle.dump(sampler.get_log_prob(), f, protocol=-1)
Beispiel #40
0
def benchmark_ensembles(print_to_file=False):

    # load all possible models and datasets
    datasets = __load_datasets()
    models = Classifier.all_models()
    voting_systems = VotingSystem.all_voting_systems()
    ensemble_types = Ensemble.all_ensemble_types()
    
    # output format
    if print_to_file == True:
        output_string_format = "{0},{1},{8},{2},{3},{4},{5},{6},{7}"
    else:
        output_string_format = "{0: <15}\t{1: <28}\t{8: <15}{2: <13}\t{3: <18}\t{4: <28}\t{5: <22}\t{6: <15}\t{7: <22}"

    # print output header
    print(output_string_format
      .format('score','dataset','dataset_size','number_of_classes','model','ensemble','ensemble_size','voting_system', 'feature_count'))
    
    
    ensamble_sizes = [1, 2, 4, 6, 8, 12, 15, 20, 25, 30, 40, 50, 60, 70, 100][::-1]

    process_jobs_args = []
    l = Lock()

    for classifiers_in_ensamble in ensamble_sizes:

        for dataset in datasets:
            
            # load mock_classifier
            X, Y = dataset.load()
            # dataset info
            dataset_size = X.shape[0]
            classes_count = np.unique(Y).size
            
            # Split dataset into kfold datasets
            kfold_labels = __kfold_labels(Y)

            # Check if every class is in every set after kfold
            for train_index, test_index in kfold_labels:
                Y_train, Y_test = Y[train_index], Y[test_index]
                assert np.unique(Y_train).size == classes_count
            
            # evaluate models
            for model in models:
                    
                # score enembles based on current model
                for voting_system in voting_systems:
                    
                    for ensemble_type in ensemble_types:

                        feature_labels = None
                        if ensemble_type == RandomSubspace:
                            feature_labels = __load_feature_labels(dataset.path())

                        # create ensemble
                        ensemble = ensemble_type(voting_system, type(model), classifiers_in_ensamble)
                        
                        job_args = ensemble, X, Y, kfold_labels, dataset, model, output_string_format, classifiers_in_ensamble, voting_system.name(), feature_labels
                        process_jobs_args.append(job_args)
                        
    pool = Pool(initializer=__init_proc, initargs=(l, ), processes=None)
    pool.map(__model_score_job, process_jobs_args)
    pool.terminate()
    pool = None
Beispiel #41
0
class Executor(QueueListener):
    '''smarbus JSON RPC 请求执行器

    使用进程池执行 RPC 请求

    :param queue_maxsize: 任务队列最大值
        默认为0,表示无限制。

    :param pool_processes: 执行器池的最大数量
        默认为 none,表示使用 CPU 核心数量作为其最大值

    :param pool_maxtasksperchild: 进程池最大执行数量
        默认为 None,表示无限制。超过该值,则重启子进程。仅对进程池模型有效。

    在接收到 smartbus 请求后,需要向这个队列放置数据,数据的格式是: ``client, pack_info, txt``
    分别是:收到数据的 smartbus 客户端的实例,数据包附加信息,数据文本。

    收到数据后,本类型的实例将按照 JSON-RPC 格式解析数据,并执行 JSON RPC 请求,最后将执行结果通过 smartbus 客户端进行返回。
    返回数据格式是符合 JSON RPC 标准的字符串。
    '''

    def __init__(self, queue_maxsize=0, pool_processes=None, pool_maxtasksperchild=None):
        self._pool_kdargs = dict(
            processes=pool_processes,
            initializer=_subproc_init,
            initargs=(
                globalvars.prog_args,
                globalvars.main_logging_queue,
                logging.root.level
            ),
            maxtasksperchild=pool_maxtasksperchild
        )
        self._pool = None
        if PY3K:
            super().__init__(queue.Queue(queue_maxsize))
        else:
            super(Executor, self).__init__(queue.Queue(queue_maxsize))
        if PY3K:
            self._logger = logging.getLogger(self.__class__.__qualname__)
        else:
            self._logger = logging.getLogger(self.__class__.__name__)

    def put(self, client, pack_info, txt):
        self.queue.put((client, pack_info, txt, time.time()))

    def start(self):
        self._logger.info('start() >>>. pool arguments: %s', self._pool_kdargs)
        self._pool = Pool(**self._pool_kdargs)
        if PY3K:
            super().start()
        else:
            super(Executor, self).start()
        self._logger.info('start() <<<')

    def stop(self):
        self._logger.info('stop() >>>')
        super().stop()
        self._logger.debug('pool.terminate() ...')
        self._pool.terminate()
        self._logger.debug('pool.join() ...')
        self._pool.join()
        self._pool = None
        self._logger.info('stop() <<<')

    def handle(self, record):
        if globalvars.prog_args.verbose:
            self._logger.debug('handle(record=%s)', record)
        request = None
        try:
            client, pack_info, txt, begin_time = record
            try:
                request, _, _ = jsonrpc.parse(txt)
            except Exception as e:
                if globalvars.prog_args.verbose:
                    self._logger.error(
                        'JSONRPC parse error: %s %s', type(e), e)
            if request:
                _id = request.get('id')
                _method = request['method']
                _args = request['args']
                _kwargs = request['kwargs']

                def _callback(result):
                    try:
                        if isinstance(result, Exception):  # 如果返回结果是异常,就返回错误结果,并抛出异常
                            error = result
                            if _id:  # 如果有 RPC ID ,就需要返回错误结果
                                if isinstance(error, jsonrpc.Error):  # 处理 jsonrpc.Error 异常
                                    response = error.to_dict()
                                    response['id'] = _id
                                else:  # 处理其它异常
                                    response = {
                                        'jsonrpc': jsonrpc.jsonrpc_version,
                                        'id': _id,
                                        'error': {
                                            'code':-32500,
                                            'message': '{} {}'.format(type(error), error),
                                            'data': None,
                                        }
                                    }
                                data = json.dumps(response)
                                client.sendNotify(pack_info.srcUnitId, pack_info.srcUnitClientId, None, _id, 0, settings.SMARTBUS_NOTIFY_TTL, data)
                            raise error  # 抛出异常
                        if globalvars.prog_args.verbose:
                            self._logger.debug(
                                'call back:\n    result=%s %s\n    duration=%s\n    request=%s',
                                type(result), result, time.time() -
                                begin_time, record
                            )
                        if _id:  # 如果有 RPC ID ,就需要返回执行结果
                            response = {
                                'jsonrpc': jsonrpc.jsonrpc_version,
                                'id': _id,
                                'result': result,
                            }
                            data = json.dumps(response)
                            client.sendNotify(pack_info.srcUnitId, pack_info.srcUnitClientId, None, _id, 0, settings.SMARTBUS_NOTIFY_TTL, data)
                    except Exception as e:
                        if globalvars.prog_args.verbose:
                            self._logger.exception(
                                'error occurred in handle._callback():\n    request=%s', record)
                        else:
                            self._logger.error(
                                'error occurred in handle._callback():\n    error: %s %s', type(e), e)
                pass  # end of _callback

                def _error_callback(error):
                    try:
                        # Python3.4 issue20980: In multiprocessing.pool,
                        # ExceptionWithTraceback should derive from Exception
                        if not isinstance(error, Exception):
                            error = error.exc
                        if globalvars.prog_args.verbose:
                            self._logger.exception(
                                'error callback:\n    duration=%s\n    request=%s:\n  %s %s',
                                time.time() - begin_time, record, type(error), error
                            )
                        else:
                            self._logger.error(
                                'error callback:\n    %s %s\n    duration=%s\n    request=%s\n  %s %s',
                                type(error), error, time.time() -
                                begin_time, record, type(error), error
                            )
                        if _id:  # 如果有 RPC ID ,就需要返回错误结果
                            if isinstance(error, jsonrpc.Error):  # JSONRPC 异常
                                response = error.to_dict()
                                response['id'] = _id
                            else:  # 其它异常
                                response = {
                                    'jsonrpc': jsonrpc.jsonrpc_version,
                                    'id': _id,
                                    'error': {
                                        'code':-32500,
                                        'message': '{} {}'.format(type(error), error),
                                        'data': None,
                                    }
                                }
                            data = json.dumps(response)
                            client.sendNotify(pack_info.srcUnitId, pack_info.srcUnitClientId, None, _id, 0, settings.SMARTBUS_NOTIFY_TTL, data)
                    except Exception as e:
                        if globalvars.prog_args.verbose:
                            self._logger.exception(
                                'error occurred in handle._error_callback():\n    request=%s', record)
                        else:
                            self._logger.error(
                                'error occurred in handle._error_callback():\n    error=%s', e)
                pass  # end of _error_callback

                if globalvars.prog_args.verbose:
                    self._logger.debug('pool.apply_async(%s, %s, %s)', _method, _args, _kwargs)
                if sys.version_info[0] < 3:
                    self._pool.apply_async(
                        func=partial(_poolfunc, _method),
                        args=(_args, _kwargs),
                        callback=_callback
                    )
                else:
                    self._pool.apply_async(
                        func=partial(_poolfunc, _method),
                        args=(_args, _kwargs),
                        callback=_callback,
                        error_callback=_error_callback
                    )

        except Exception as e:
            if globalvars.prog_args.verbose:
                self._logger.exception(
                    'error occurred in handle():\n    request=%s', record)
            else:
                self._logger.error(
                    'error occurred in handle():\n    error: %s %s',
                    type(e), e)
Beispiel #42
0
    fmcmc = os.path.join('/global/cscratch1/sd/chahah/provabgs/raga/',
                         sample.replace('.fits', '.%i.hdf5' % igal))
    # run MCMC
    zeus_chain = desi_mcmc.run(
        wave_obs=w_obs,
        flux_obs=f_obs,
        flux_ivar_obs=i_obs,
        bands='desi',  # g, r, z
        photo_obs=photo_flux_i,
        photo_ivar_obs=photo_ivar_i,
        zred=zred_i,
        vdisp=0.,
        sampler='zeus',
        nwalkers=30,
        burnin=0,
        opt_maxiter=2000,
        niter=niter,
        progress=True,
        debug=True,
        writeout=fmcmc,
        overwrite=True)
    return None


pool = Pool(processes=n_cpu)
pool.map(partial(run_mcmc), np.arange(i0, i1 + 1))
pool.close()
pool.terminate()
pool.join()
Beispiel #43
0
class TaskPool(object):
    """Process Pool for processing tasks in parallel.

    :param limit: see :attr:`limit` attribute.
    :param logger: see :attr:`logger` attribute.


    .. attribute:: limit

        The number of processes that can run simultaneously.

    .. attribute:: logger

        The logger used for debugging.

    """

    def __init__(self, limit, logger=None):
        self.limit = limit
        self.logger = logger or multiprocessing.get_logger()
        self._pool = None
        self._processes = None

    def start(self):
        """Run the task pool.

        Will pre-fork all workers so they're ready to accept tasks.

        """
        self._processes = {}
        self._pool = Pool(processes=self.limit)

    def stop(self):
        """Terminate the pool."""
        self._pool.terminate()
        self._processes = {}
        self._pool = None

    def apply_async(self, target, args=None, kwargs=None, callbacks=None,
            errbacks=None, on_ack=None, meta=None):
        """Equivalent of the :func:``apply`` built-in function.

        All ``callbacks`` and ``errbacks`` should complete immediately since
        otherwise the thread which handles the result will get blocked.

        """
        args = args or []
        kwargs = kwargs or {}
        callbacks = callbacks or []
        errbacks = errbacks or []
        meta = meta or {}
        tid = gen_unique_id()

        on_return = curry(self.on_return, tid, callbacks, errbacks,
                          on_ack, meta)

        result = self._pool.apply_async(target, args, kwargs,
                                        callback=on_return)

        self._processes[tid] = [result, callbacks, errbacks, meta]

        return result

    def on_return(self, tid, callbacks, errbacks, on_ack, meta, ret_value):
        """What to do when the process returns."""

        # Acknowledge the task as being processed.
        if on_ack:
            on_ack()

        try:
            del(self._processes[tid])
        except KeyError:
            pass
        else:
            self.on_ready(callbacks, errbacks, meta, ret_value)

    def full(self):
        """Is the pool full?

        :returns: ``True`` if the maximum number of concurrent processes
            has been reached.

        """
        return len(self._processes.values()) >= self.limit

    def get_worker_pids(self):
        """Returns the process id's of all the pool workers."""
        return [process.pid for process in self._pool._pool]

    def on_ready(self, callbacks, errbacks, meta, ret_value):
        """What to do when a worker task is ready and its return value has
        been collected."""

        if isinstance(ret_value, ExceptionInfo):
            if isinstance(ret_value.exception, (
                    SystemExit, KeyboardInterrupt)):
                raise ret_value.exception
            for errback in errbacks:
                errback(ret_value, meta)
        else:
            for callback in callbacks:
                callback(ret_value, meta)
Beispiel #44
0
def main(force_reanalyze=False, include_hidden=False,
         dry_run=False, gain_type='auto',
         jobs=default_job_count(),
         quiet=False, verbose=False,
         *music_directories
         ):
    """Add replaygain tags to your music files."""
    if quiet:
        logging.basicConfig(level=logging.WARN)
    elif verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)

    # Some pesky functions used below will catch KeyboardInterrupts
    # inappropriately, so install an alternate handler that bypasses
    # KeyboardInterrupt instead.
    def signal_handler(sig, frame):
        print "Canceled."
        os.kill(os.getpid(), signal.SIGTERM)
    original_handler = signal.signal(signal.SIGINT, signal_handler)

    track_class = RGTrack
    if dry_run:
        logging.warn('This script is running in "dry run" mode, so no files will actually be modified.')
        track_class = RGTrackDryRun
    if len(music_directories) == 0:
        logging.error("You did not specify any music directories or files. Exiting.")
        sys.exit(1)

    logging.info("Searching for music files in the following directories:\n%s", "\n".join(music_directories),)
    tracks = [ track_class(f) for f in get_all_music_files(music_directories, ignore_hidden=(not include_hidden)) ]

    # Filter out tracks for which we can't get the length
    for t in tracks[:]:
        try:
            len(t)
        except Exception:
            logging.error("Track %s appears to be invalid. Skipping.", t.filename)
            tracks.remove(t)

    if len(tracks) == 0:
        logging.error("Failed to find any tracks in the directories you specified. Exiting.")
        sys.exit(1)
    track_sets = RGTrackSet.MakeTrackSets(tracks)

    # Remove the earlier bypass of KeyboardInterrupt
    signal.signal(signal.SIGINT, original_handler)

    logging.info("Beginning analysis")
    handler = TrackSetHandler(force=force_reanalyze, gain_type=gain_type)

    # For display purposes, calculate how much granularity is required
    # to show visible progress at each update
    total_length = sum(len(ts) for ts in track_sets)
    min_step = min(len(ts) for ts in track_sets)
    places_past_decimal = max(0,int(math.ceil(-math.log10(min_step * 100.0 / total_length))))
    update_string = '%.' + str(places_past_decimal) + 'f%% done'

    import gst
    pool = None
    try:
        if jobs == 1:
            # Sequential
            handled_track_sets = imap(handler, track_sets)
        else:
            # Parallel
            pool = Pool(jobs)
            handled_track_sets = pool.imap_unordered(handler,track_sets)
        processed_length = 0
        percent_done = 0
        for ts in handled_track_sets:
            processed_length = processed_length + len(ts)
            percent_done = 100.0 * processed_length / total_length
            logging.info(update_string, percent_done)
        logging.info("Analysis complete.")
    except KeyboardInterrupt:
        if pool is not None:
            logging.debug("Terminating process pool")
            pool.terminate()
            pool = None
        raise
    finally:
        if pool is not None:
            logging.debug("Closing transcode process pool")
            pool.close()
    if dry_run:
        logging.warn('This script ran in "dry run" mode, so no files were actually modified.')
    pass
Beispiel #45
0
def main():
    num_args = 2

    args = sys.argv[1:]
    if len(args) != num_args:
        print(
            "Usage: python3 launch_clients.py <client-scenario.json> <client_keys_to_run>\n"
            "Example: python3 launch_clients.py scenario1.json clients1,clients2,clients3"
        )
        exit(1)

    client_scenario_filename = args[0]
    groups = args[1].split(",")

    with open(f"{CLI_SCENARIOS_PATH}/{client_scenario_filename}",
              'r') as cli_scenario_fp:
        cli_scenario = json.load(cli_scenario_fp)

    print(f"Launching scenario: {client_scenario_filename}")

    setup_client_node_dirs(cli_scenario, groups)

    print("Finished setting up dirs")

    ingress = get_ingress()

    print(f"Got ingress {ingress}")

    env_vars = {
        "AUTHENTICATION_URL": ingress,
        "BATTLES_URL": ingress,
        "GYM_URL": ingress,
        "LOCATION_URL": ingress,
        "MICROTRANSACTIONS_URL": ingress,
        "NOTIFICATIONS_URL": ingress,
        "STORE_URL": ingress,
        "TRADES_URL": ingress,
        "TRAINERS_URL": ingress,
        "INGRESS_URL": ingress,
        "NOVAPOKEMON": NOVAPOKEMON_DIR,
        "LOCATION_TAGS": f'{CLI_DIR}/location_tags.json',
        "DELAYS_CONFIG": f'{CLI_DIR}/delays_config.json',
        "CLIENT_DELAYS": f'{CLI_DIR}/client_delays.json',
        "CELLS_TO_REGION": f'{CLI_DIR}/cells_to_region.json',
        "REGIONS_TO_AREA": f'{CLI_DIR}/regions_to_area.json',
        "CONFIGS": f'{CLI_DIR}/configs.json',
        "LAT": f'{CLI_DIR}/lats.txt',
        "LOCATIONS": f'{CLI_DIR}/locations.json'
    }

    print(f'Created env vars!')

    pool = Pool(processes=os.cpu_count())

    print("Will launch clients...")

    async_waits = []
    for cli_job_name, cli_job in cli_scenario.items():
        if cli_job_name not in groups:
            print(f"{cli_job_name} not in list")
            continue
        t = time.localtime()
        current_time = time.strftime("%H:%M:%S", t)
        print(f'Launching {cli_job_name} at {current_time}...', flush=True)
        async_waits.append(
            pool.apply_async(launch_cli_job,
                             (cli_job_name, cli_job, env_vars)))

    for w in async_waits:
        cli_job_name = w.get()
        print(f'Finished multiclient for {cli_job_name}')

    pool.terminate()
    pool.close()
Beispiel #46
0
class ProcessPoolStrategy(ParallelStrategy, _PoolRunnableStrategy,
                          _Resultable):

    _Processors_Pool: Pool = None
    _Processors_List: List[Union[ApplyResult, AsyncResult]] = None

    def __init__(self, pool_size: int):
        super().__init__(pool_size=pool_size)

    def initialization(self,
                       queue_tasks: Optional[Union[_BaseQueueTask,
                                                   _BaseList]] = None,
                       features: Optional[Union[_BaseFeatureAdapterFactory,
                                                _BaseList]] = None,
                       *args,
                       **kwargs) -> None:
        super(ProcessPoolStrategy,
              self).initialization(queue_tasks=queue_tasks,
                                   features=features,
                                   *args,
                                   **kwargs)

        # Activate multiprocessing.managers.BaseManager server
        activate_manager_server()

        # Initialize and build the Processes Pool.
        __pool_initializer: Callable = kwargs.get("pool_initializer", None)
        __pool_initargs: IterableType = kwargs.get("pool_initargs", None)
        self._Processors_Pool = Pool(processes=self.pool_size,
                                     initializer=__pool_initializer,
                                     initargs=__pool_initargs)

    def apply(self,
              tasks_size: int,
              function: Callable,
              args: Tuple = (),
              kwargs: Dict = {}) -> None:
        self.reset_result()
        __process_running_result = None

        try:
            __process_running_result = [
                self._Processors_Pool.apply(func=function,
                                            args=args,
                                            kwds=kwargs)
                for _ in range(tasks_size)
            ]
            __exception = None
            __process_run_successful = True
        except Exception as e:
            __exception = e
            __process_run_successful = False

        # Save Running result state and Running result value as dict
        self._result_saving(successful=__process_run_successful,
                            result=__process_running_result,
                            exception=None)

    def async_apply(self,
                    tasks_size: int,
                    function: Callable,
                    args: Tuple = (),
                    kwargs: Dict = {},
                    callback: Callable = None,
                    error_callback: Callable = None) -> None:
        self.reset_result()
        self._Processors_List = [
            self._Processors_Pool.apply_async(func=function,
                                              args=args,
                                              kwds=kwargs,
                                              callback=callback,
                                              error_callback=error_callback)
            for _ in range(tasks_size)
        ]

        for process in self._Processors_List:
            _process_running_result = None
            _process_run_successful = None
            _exception = None

            try:
                _process_running_result = process.get()
                _process_run_successful = process.successful()
            except Exception as e:
                _exception = e
                _process_run_successful = False

            # Save Running result state and Running result value as dict
            self._result_saving(successful=_process_run_successful,
                                result=_process_running_result,
                                exception=_exception)

    def apply_with_iter(self,
                        functions_iter: List[Callable],
                        args_iter: List[Tuple] = None,
                        kwargs_iter: List[Dict] = None) -> None:
        self.reset_result()
        __process_running_result = None

        if args_iter is None:
            args_iter = [() for _ in functions_iter]

        if kwargs_iter is None:
            kwargs_iter = [{} for _ in functions_iter]

        try:
            __process_running_result = [
                self._Processors_Pool.apply(func=_func,
                                            args=_args,
                                            kwds=_kwargs) for _func, _args,
                _kwargs in zip(functions_iter, args_iter, kwargs_iter)
            ]
            __exception = None
            __process_run_successful = True
        except Exception as e:
            __exception = e
            __process_run_successful = False

        # Save Running result state and Running result value as dict
        self._result_saving(successful=__process_run_successful,
                            result=__process_running_result,
                            exception=None)

    def async_apply_with_iter(
            self,
            functions_iter: List[Callable],
            args_iter: List[Tuple] = None,
            kwargs_iter: List[Dict] = None,
            callback_iter: List[Callable] = None,
            error_callback_iter: List[Callable] = None) -> None:
        self.reset_result()

        if args_iter is None:
            args_iter = [() for _ in functions_iter]

        if kwargs_iter is None:
            kwargs_iter = [{} for _ in functions_iter]

        if callback_iter is None:
            callback_iter = [None for _ in functions_iter]

        if error_callback_iter is None:
            error_callback_iter = [None for _ in functions_iter]

        self._Processors_List = [
            self._Processors_Pool.apply_async(func=_func,
                                              args=_args,
                                              kwds=_kwargs,
                                              callback=_callback,
                                              error_callback=_error_callback)
            for _func, _args, _kwargs, _callback, _error_callback in zip(
                functions_iter, args_iter, kwargs_iter, callback_iter,
                error_callback_iter)
        ]

        for process in self._Processors_List:
            _process_running_result = None
            _process_run_successful = None
            _exception = None

            try:
                _process_running_result = process.get()
                _process_run_successful = process.successful()
            except Exception as e:
                _exception = e
                _process_run_successful = False

            # Save Running result state and Running result value as dict
            self._result_saving(successful=_process_run_successful,
                                result=_process_running_result,
                                exception=_exception)

    def map(self,
            function: Callable,
            args_iter: IterableType = (),
            chunksize: int = None) -> None:
        self.reset_result()
        _process_running_result = None

        try:
            _process_running_result = self._Processors_Pool.map(
                func=function, iterable=args_iter, chunksize=chunksize)
            _exception = None
            _process_run_successful = True
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def async_map(self,
                  function: Callable,
                  args_iter: IterableType = (),
                  chunksize: int = None,
                  callback: Callable = None,
                  error_callback: Callable = None) -> None:
        self.reset_result()

        _process_running_result = None
        _exception = None

        _map_result = self._Processors_Pool.map_async(
            func=function,
            iterable=args_iter,
            chunksize=chunksize,
            callback=callback,
            error_callback=error_callback)

        try:
            _process_running_result = _map_result.get()
            _process_run_successful = _map_result.successful()
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def map_by_args(self,
                    function: Callable,
                    args_iter: IterableType[IterableType] = (),
                    chunksize: int = None) -> None:
        self.reset_result()
        _process_running_result = None

        try:
            _process_running_result = self._Processors_Pool.starmap(
                func=function, iterable=args_iter, chunksize=chunksize)
            _exception = None
            _process_run_successful = True
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def async_map_by_args(self,
                          function: Callable,
                          args_iter: IterableType[IterableType] = (),
                          chunksize: int = None,
                          callback: Callable = None,
                          error_callback: Callable = None) -> None:
        self.reset_result()
        _map_result = self._Processors_Pool.starmap_async(
            func=function,
            iterable=args_iter,
            chunksize=chunksize,
            callback=callback,
            error_callback=error_callback)
        _process_running_result = _map_result.get()
        _process_run_successful = _map_result.successful()

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=None)

    def imap(self,
             function: Callable,
             args_iter: IterableType = (),
             chunksize: int = 1) -> None:
        self.reset_result()
        _process_running_result = None

        try:
            imap_running_result = self._Processors_Pool.imap(
                func=function, iterable=args_iter, chunksize=chunksize)
            _process_running_result = [
                result for result in imap_running_result
            ]
            _exception = None
            _process_run_successful = True
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def imap_unordered(self,
                       function: Callable,
                       args_iter: IterableType = (),
                       chunksize: int = 1) -> None:
        self.reset_result()
        _process_running_result = None

        try:
            imap_running_result = self._Processors_Pool.imap_unordered(
                func=function, iterable=args_iter, chunksize=chunksize)
            _process_running_result = [
                result for result in imap_running_result
            ]
            _exception = None
            _process_run_successful = True
        except Exception as e:
            _exception = e
            _process_run_successful = False

        # Save Running result state and Running result value as dict
        for __result in (_process_running_result or []):
            self._result_saving(successful=_process_run_successful,
                                result=__result,
                                exception=_exception)

    def _result_saving(self, successful: bool, result: List,
                       exception: Exception) -> None:
        _process_result = {
            "successful": successful,
            "result": result,
            "exception": exception
        }
        self._Processors_Running_Result.append(_process_result)

    def close(self) -> None:
        self._Processors_Pool.close()
        self._Processors_Pool.join()

    def terminal(self) -> None:
        self._Processors_Pool.terminate()

    def get_result(self) -> List[_ProcessPoolResult]:
        return self.result()

    def _saving_process(self) -> List[_ProcessPoolResult]:
        _pool_results = []
        for __result in self._Processors_Running_Result:
            _pool_result = _ProcessPoolResult()
            _pool_result.is_successful = __result["successful"]
            _pool_result.data = __result["result"]
            _pool_results.append(_pool_result)
        return _pool_results
Beispiel #47
0
def main(lib_name,
         in_file=sys.stdin,
         out_file=sys.stdout,
         cached_only=False,
         just=None,
         num_workers=None,
         read_cache=True,
         write_cache=True):

    def cb_ready0(result, progress):
        """Informer about the progress of the process."""
        name    = result[0]
        url     = result[1]
        output  = result[2]
        cached  = result[3]

        if output:
            textout = "%s" % (name)
        elif url != None:
            textout =  "Failed to parse output for %s in %s.\n" % (name, url)
        else:
            textout = None if cached_only else "Could not locate %s documentation.\n" % (name)
        
        progress[0] += 1
        percent = float(progress[0]) / float(progress[1]) * 100.0

        if textout == None:
            return

        if not out_file.isatty():            
            max_cols = 75
            sys.stderr.write("\r%s" % (' ' * max_cols))
            
            #progbar  = "\r[%.2d%%] " % (int(percent))
            progbar = "\r[%d/%d] " % (progress[0], progress[1])
            max_cols = max_cols - len(progbar)
            if len(textout) > max_cols:
                if textout.endswith('\n'):
                    textout = textout[:max_cols-4] + '...\n'
                else:
                    textout = textout[:max_cols-3] + '...'
        else:
            #progbar = "[%.2d%%] " % int(percent)
            progbar = "[%d/%d] " % (progress[0], progress[1])
            textout =  textout + '\n' if not textout.endswith('\n') else textout

        sys.stderr.write("%s%s" % (progbar, textout))
        sys.stderr.flush()

    def write_csv(f):
        """Process and write the csv data into the file f."""
        progress = [0, len(funcs)]
        jobs = [pool.apply_async(msdn_get_doc, (name, msdn_cache, cached_only),
                                 callback=lambda r: cb_ready0(r, progress))
                for name in funcs]
        f.write("Name,Prototype,Bad Return,Errors,SDK Client,SDK Server,SDK Phone,URL\n")
        for job in jobs:
            name, url, output, cached = job.get()
            html      = output and output.html or None
            prototype = output and output.prototype or ""
            errors    = output and " ".join(output.errors) or ""
            bad_ret   = output and output.bad_ret or ""
            sdkclient = (output and output.sdks[0]) and output.sdks[0] or ""
            sdkserver = (output and output.sdks[1]) and output.sdks[1] or ""
            sdkphone  = (output and output.sdks[2]) and output.sdks[2] or ""
            f.write('%s,"%s","%s","%s","%s","%s","%s","%s"\n' % (name, prototype, bad_ret, errors, sdkclient, sdkserver, sdkphone, url))
            #f.write('%s,"%s","%s","%s","%s","%s","%s","%s"\n' % (name, prototype, url, sdkclient, sdkserver, sdkphone, bad_ret, errors))
            if html != None and not cached and write_cache:
                msdn_cache.write(lib_name, name, html)

    if num_workers == None:
        # IO bound operations, so 10 times the cpu count isn't a problem.
        num_workers = min(40, 10 * multiprocessing.cpu_count())
    
    pool        = Pool(processes=num_workers, initializer=_init_worker)
    msdn_cache  = read_cache and MsdnCache.open(MSDN_CACHE_PATH) or MsdnCache()
    funcs       = find_wide_functions(read_def(in_file))

    if just != None:
        funcs = [fc for fc in funcs if remove_wide(fc) in just]

    try:
        write_csv(out_file)
    except KeyboardInterrupt:
        pool.terminate()
        pool.join()

    # because of our progress bar
    sys.stderr.write("\n")
    try:
        print('press CTRL-c to stop generating samples')
        it = pool.imap_unordered(f, cycle(sim_list))
        
        while 1:
            sim, result = it.next(timeout=SIMULATION_TIMEOUT)
            completed.append(sim)
            sys.stdout.write('.')
            for p, wins in result.items():
                container[p].append( (sim, wins) )
            
    except KeyboardInterrupt:
        pool.close()
        print('stopping all simulations...')
    finally:
        pool.terminate()
        pool.join()

    c = dict(Counter(completed).most_common())
    for idx, sim in enumerate([s.__name__ for s in sim_list]):
        print('Test: {0}, Iterations {1}, Heuristic: {2}'.format(idx, c[sim], sim))
        
    for pair in pairs:
        print('')
        print(pair)
        f, p = f_oneway(*[[i[1] for i in container[pair] if i[0] == sim] for sim in [s.__name__ for s in sim_list]])
        print('F-stat: {0} at sig {1}: {2}'.format(str(round(f, 3)).ljust(7), 
                                                   str(round(p, 3)).ljust(7),
                                                   ['NULL','REJECT'][p <= .05]))
    
        if p <= .05:
Beispiel #49
0

def run(name):
    print("%s子进程开始,进程ID:%d" % (name, os.getpid()))
    start = time()
    sleep(random.choice([1, 2, 3, 4]))
    end = time()
    print("%s子进程结束,进程ID:%d。耗时0.2%f" % (name, os.getpid(), end - start))


if __name__ == "__main__":
    print("父进程开始")
    # 创建多个进程,表示可以同时执行的进程数量。默认大小是CPU的核心数
    p = Pool(4)
    for i in range(10):
        # 创建进程,放入进程池统一管理
        p.apply_async(run, args=(i, ))
    # 如果我们用的是进程池,在调用join()之前必须要先close(),
    # 并且在close()之后不能再继续往进程池添加新的进程
    p.close()
    # 进程池对象调用join,会等待进程池中所有的子进程结束完毕再去结束父进程
    p.join()
    print("父进程结束。")
    p.terminate()

#
# close():如果我们用的是进程池,在调用join()之前必须要先close(),
# 并且在close()之后不能再继续往进程池添加新的进程
# join():进程池对象调用join,会等待进程池中所有的子进程结束完毕再去结束父进程
# terminate():一旦运行到此步,不管任务是否完成,立即终止。
Beispiel #50
0
class DepthFrameCompressor(object):
    """
    Asynchronous compression pipeline for depth frames.

    *kinect* is a :py:class:`streamkinect2.mock.MockKinect`-like object. Depth
    frames emitted by :py:meth:`on_depth_frame` will be compressed with
    frame-drop if the compressor becomes overloaded.

    If *io_loop* is provided, it specifies the
    :py:class:`tornado.ioloop.IOLoop` which is used to co-ordinate the worker
    process. If not provided, the global instance is used.

    .. py:attribute:: kinect

        Kinect object associated with this compressor.
    """

    on_compressed_frame = Signal()
    """Signal emitted when a new compressed frame is available. Receivers take
    a single keyword argument, *compressed_frame*, which is a Python
    buffer-like object containing the compressed frame data. The signal is
    emitted on the IOLoop thread."""

    # The maximum number of frames we can be waiting for before we start
    # dropping them.
    _MAX_IN_FLIGHT = cpu_count() + 1

    def __init__(self, kinect, io_loop=None):
        # Public attributes
        self.kinect = kinect

        # Private attributes
        self._io_loop = io_loop or tornado.ioloop.IOLoop.instance()
        self._pool = Pool() # worker process pool
        self._n_in_flight = 0 # How many frames are we waiting for?
        self._n_dropped = 0

        # Wire ourselves up for depth frame events
        kinect.on_depth_frame.connect(self._on_depth_frame, sender=kinect)

    def __del__(self):
        # As a courtesy, terminate the worker pool to avoid having a sea of
        # dangling processes.
        self._pool.terminate()

    def _on_compressed_frame(self, compressed_frame):
        # Record arrival of frame
        self._n_in_flight -= 1

        # Send signal
        try:
            self._io_loop.add_callback(
                self.on_compressed_frame.send,
                self, compressed_frame=compressed_frame
            )
        except Exception as e:
            # HACK: Since multiprocessing *might* call this handler after the
            # io loop has shut down (which will raise an Exception) and because
            # there's no documented way to determine if the io loop is still
            # alive ahead of time, we swallow exceptions here. This should
            # happen rarely when one is rapidly starting and stopping IOLoops
            # (such as in the test-suite!) so log it as a warning.
            log.warn('DepthFrameCompressor swallowed {0} exception'.format(e))

    def _on_depth_frame(self, kinect, depth_frame):
        # If we aren't waiting on too many frames, submit
        if self._n_in_flight < DepthFrameCompressor._MAX_IN_FLIGHT:
            self._pool.apply_async(_compress_depth_frame,
                    args=(depth_frame,), callback=self._on_compressed_frame)
            self._n_in_flight += 1
        else:
            # Only log every 10 dropped frames to avoid being too spammy
            self._n_dropped += 1
            if self._n_dropped % 10 == 0:
                log.warn('Dropped {0} depth frames'.format(self._n_dropped))
class ProcessExecutor(object):
    """
    Create promises which will deliver in a separate process.
    """

    def __init__(self, processes=None):
        self._processes = processes
        self._pool = None


    def __enter__(self):
        return self


    def __exit__(self, exc_type, _exc_val, _exc_tb):
        """
        Using the managed interface forces blocking delivery at the end of
        the managed segment.
        """

        self.deliver()
        return (exc_type is None)


    def _promise(self):
        """
        override to use a different promise mechanism
        """

        return promise(blocking=True)


    def _get_pool(self):
        """
        override to provide a different pool implementation
        """

        if not self._pool:
            self._pool = Pool(processes=self._processes)
        return self._pool


    def future(self, work, *args, **kwds):
        """
        Promise to deliver on the results of work in the future.

        Parameters
        ----------
        work : `callable`
          This is the work which will be performed to deliver on the
          future.
        *args : `optional positional parameters`
          arguments to the `work` function
        **kwds : `optional named parameters`
          keyword arguments to the `work` function

        Returns
        -------
        value : `promise`
          a promise acting as a placeholder for the result of
          evaluating `work(*args, **kwds)`. Note that calling `deliver`
          on this promise will potentially block until the underlying
          result is available.
        """

        promised, setter, seterr = self._promise()

        def callback(value):
            # value is collected as the result of the _perform_work
            # function at the top of this module
            success, result = value
            if success:
                setter(result)
            else:
                seterr(*result)

        # queue up the work in our pool
        pool = self._get_pool()
        pool.apply_async(_perform_work, [work, args], kwds, callback)

        return promised


    def terminate(self):
        """
        Breaks all the remaining undelivered promises, halts execution of
        any parallel work being performed.

        Any promise which had not managed to be delivered will never
        be delivered after calling `terminate`. Attempting to call
        `deliver` on them will result in a deadlock.
        """

        # TODO: is there a way for us to cause all undelivered
        # promises to raise an exception of some sort when this
        # happens? That would be better than deadlocking while waiting
        # for delivery.

        if self._pool is not None:
            self._pool.terminate()
            self._pool = None


    def deliver(self):
        """
        Deliver on all underlying promises. Blocks until complete.
        """

        if self._pool is not None:
            self._pool.close()
            self._pool.join()
            self._pool = None
Beispiel #52
0
def raster2pyramid(
    input_file,
    output_dir,
    options
    ):
    """
    Creates a tile pyramid out of an input raster dataset.
    """
    pyramid_type = options["pyramid_type"]
    scale_method = options["scale_method"]
    output_format = options["output_format"]
    resampling = options["resampling"]
    zoom = options["zoom"]
    bounds = options["bounds"]
    overwrite = options["overwrite"]

    # Prepare process parameters
    minzoom, maxzoom = _get_zoom(zoom, input_file, pyramid_type)
    process_file = os.path.join(
        os.path.dirname(os.path.realpath(__file__)),
        "tilify.py"
    )

    with rasterio.open(input_file, "r") as input_raster:
        output_bands = input_raster.count
        input_dtype = input_raster.dtypes[0]
        output_dtype = input_raster.dtypes[0]
        nodataval = input_raster.nodatavals[0]
        if not nodataval:
            nodataval = 0
        if output_format == "PNG":
            if output_bands > 3:
                output_bands = 3
                output_dtype = 'uint8'
        scales_minmax = ()
        if scale_method == "dtype_scale":
            for index in range(1, output_bands+1):
                scales_minmax += (DTYPE_RANGES[input_dtype], )
        elif scale_method == "minmax_scale":
            for index in range(1, output_bands+1):
                band = input_raster.read(index)
                scales_minmax += ((band.min(), band.max()), )
        elif scale_method == "crop":
            for index in range(1, output_bands+1):
                scales_minmax += ((0, 255), )
        if input_dtype == "uint8":
            scale_method = None
            scales_minmax = ()
            for index in range(1, output_bands+1):
                scales_minmax += ((None, None), )

    # Create configuration
    config = {}
    config.update(
        process_file=process_file,
        output={
            "path": output_dir,
            "format": output_format,
            "type": pyramid_type,
            "bands": output_bands,
            "dtype": output_dtype
            },
        scale_method=scale_method,
        scales_minmax=scales_minmax,
        input_files={"raster": input_file},
        config_dir=os.getcwd(),
        process_minzoom=minzoom,
        process_maxzoom=maxzoom,
        nodataval=nodataval,
        resampling=resampling,
        bounds=bounds,
        pixelbuffer=5,
        baselevel={"zoom": maxzoom, "resampling": resampling}
    )

    LOGGER.info("preparing process ...")

    try:
        mapchete = Mapchete(
            MapcheteConfig(
                config,
                zoom=zoom,
                bounds=bounds
            )
        )
    except PyCompileError as error:
        print error
        return
    except:
        raise

    # Prepare output directory and logging
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    logging.config.dictConfig(get_log_config(mapchete))

    for zoom in reversed(range(minzoom, maxzoom+1)):
        # Determine work tiles and run
        work_tiles = mapchete.get_work_tiles(zoom)
        func = partial(_worker,
            mapchete=mapchete,
            overwrite=overwrite
        )
        pool = Pool()
        try:
            pool.map_async(func, work_tiles)
            pool.close()
        except KeyboardInterrupt:
            LOGGER.info(
                "Caught KeyboardInterrupt, terminating workers"
                )
            pool.terminate()
            break
        except:
            raise
        finally:
            pool.close()
            pool.join()
Beispiel #53
0
    try:
        for c in count():  # Infinite range
            # Create a new pool each batch and run 'reps' timesteps in each world
            pool = Pool(cores_used)
            for res in tqdm(
                    pool.imap_unordered(drive_wrapper, enumerate(worlds)),
                    position=0,
                    desc=f"Batch {c}",
                    total=n_worlds,
            ):
                pass
            pool.close()
            pool.join()

            # Back up data and map files
            for i, conf in enumerate(configs):
                call([
                    "cp", conf["output"]["data"],
                    "{}/{}.d".format(local_backup, i)
                ])
                call([
                    "cp", conf["output"]["map"],
                    "{}/{}.m".format(local_backup, i)
                ])

            if flag.flag:
                break

    except KeyboardInterrupt:
        pool.terminate()  # Kill all processes
Beispiel #54
0
def pyorbit_emcee(config_in, input_datasets=None, return_output=None):

    try:
        import emcee
    except:
        print("ERROR: emcee not installed, this will not work")
        quit()

    os.environ["OMP_NUM_THREADS"] = "1"

    optimize_dir_output = './' + config_in['output'] + '/optimize/'
    pyde_dir_output = './' + config_in['output'] + '/pyde/'
    emcee_dir_output = './' + config_in['output'] + '/emcee/'

    reloaded_optimize = False
    reloaded_pyde = False
    reloaded_emcee_multirun = False
    reloaded_emcee = False

    try:
        mc, population, starting_point, theta_dict = pyde_load_from_cpickle(
            pyde_dir_output, prefix='')
        reloaded_pyde = True
    except:
        pass

    try:
        mc, starting_point, population, _, _, sampler_chain, _, _, theta_dict, _ = \
            emcee_load_from_cpickle(emcee_dir_output, prefix='MR')
        reloaded_emcee_multirun = True
    except:
        pass

    try:
        mc, starting_point, population, _, _, sampler_chain, sampler_lnprobability, _, theta_dict, _ = \
            emcee_load_from_cpickle(emcee_dir_output)
        reloaded_emcee = True
    except:
        pass

    try:
        starting_point, previous_boundaries, theta_dict = starting_point_load_from_cpickle(
            optimize_dir_output)
        reloaded_optimize = True
    except:
        pass

    print()
    print('reloaded_optimize: ', reloaded_pyde)
    print('reloaded_pyde: ', reloaded_pyde)
    print('reloaded_emcee_multirun: ', reloaded_emcee_multirun)
    print('reloaded_emcee: ', reloaded_emcee)

    if reloaded_emcee:
        """ There's no need to do anything"""
        flatchain = emcee_flatchain(sampler_chain,
                                    mc.emcee_parameters['nburn'],
                                    mc.emcee_parameters['thin'])
        mc.model_setup()
        mc.initialize_logchi2()
        results_analysis.print_integrated_ACF(sampler_chain, theta_dict,
                                              mc.emcee_parameters['thin'])
        results_analysis.results_resumen(mc, flatchain)

        if return_output:
            return mc, sampler_chain, sampler_lnprobability
        else:
            return

    reloaded_mc = reloaded_pyde or reloaded_emcee_multirun
    if reloaded_mc:
        previous_boundaries = mc.bounds

    mc = ModelContainerEmcee()

    pars_input(config_in, mc, input_datasets)

    if mc.pyde_parameters['shutdown_jitter'] or mc.emcee_parameters[
            'shutdown_jitter']:
        for dataset_name, dataset in mc.dataset_dict.items():
            dataset.shutdown_jitter()

    # keep track of which version has been used to perform emcee computations
    mc.emcee_parameters['version'] = emcee.__version__[0]

    mc.model_setup()
    mc.create_variables_bounds()
    mc.initialize_logchi2()

    results_analysis.results_resumen(mc, None, skip_theta=True)

    mc.pyde_dir_output = pyde_dir_output
    mc.emcee_dir_output = emcee_dir_output

    mc.emcee_parameters['nwalkers'] = mc.ndim * \
        mc.emcee_parameters['npop_mult']
    if mc.emcee_parameters['nwalkers'] % 2 == 1:
        mc.emcee_parameters['nwalkers'] += 1

    if not os.path.exists(mc.emcee_dir_output):
        os.makedirs(mc.emcee_dir_output)

    print()
    print('emcee version: ', emcee.__version__)
    if mc.emcee_parameters['version'] == '2':
        print('WARNING: upgrading to version 3 is strongly advised')
    print()
    print('Include priors: ', mc.include_priors)
    print()
    print('Reference Time Tref: ', mc.Tref)
    print()
    print('Dimensions = ', mc.ndim)
    print('Nwalkers = ', mc.emcee_parameters['nwalkers'])

    if not getattr(mc, 'use_threading_pool', False):
        mc.use_threading_pool = False

    print()
    print('Using threading pool:', mc.use_threading_pool)
    print()
    print('*************************************************************')
    print()

    if reloaded_mc:

        theta_dict_legacy = theta_dict.copy()
        population_legacy = population.copy()

        theta_dict = results_analysis.get_theta_dictionary(mc)
        population = np.zeros([mc.emcee_parameters['nwalkers'], mc.ndim],
                              dtype=np.double)

        for theta_name, theta_i in theta_dict.items():
            population[:, theta_i] = population_legacy[:, theta_dict_legacy[
                theta_name]]
            mc.bounds[theta_i] = previous_boundaries[
                theta_dict_legacy[theta_name]]

        starting_point = np.median(population, axis=0)
        # print(starting_point)
        # print(population)

        print('Using previous population as starting point. ')
        sys.stdout.flush()
        print()

    else:

        if mc.starting_point_flag or reloaded_optimize:

            if reloaded_optimize:
                print(
                    'Using the output from a previous optimize run as starting point'
                )
                theta_dict_legacy = theta_dict.copy()
                starting_point_legacy = starting_point.copy()
                theta_dict = results_analysis.get_theta_dictionary(mc)
                for theta_name, theta_i in theta_dict.items():
                    starting_point[theta_i] = starting_point_legacy[
                        theta_dict_legacy[theta_name]]
            else:
                print('Using user-defined starting point from YAML file')
                mc.create_starting_point()
                starting_point = mc.starting_point

            population = np.zeros([mc.emcee_parameters['nwalkers'], mc.ndim],
                                  dtype=np.double)
            for ii in range(0, mc.emcee_parameters['nwalkers']):
                population[ii, :] = np.random.normal(starting_point, 0.0000001)

            print(
                'to create a synthetic population extremely close to the starting values.'
            )
            sys.stdout.flush()

        else:

            try:
                from pyde.de import DiffEvol
            except ImportError:
                print(
                    'ERROR! PyDE is not installed, run first with optimize instead of emcee'
                )
                quit()

            if not os.path.exists(mc.pyde_dir_output):
                os.makedirs(mc.pyde_dir_output)

            print('PyDE running')
            sys.stdout.flush()

            de = DiffEvol(mc,
                          mc.bounds,
                          mc.emcee_parameters['nwalkers'],
                          maximize=True)
            de.optimize(int(mc.pyde_parameters['ngen']))

            population = de.population
            starting_point = np.median(population, axis=0)

            theta_dict = results_analysis.get_theta_dictionary(mc)
            """ bounds redefinition and fix for PyDE anomalous results """
            if mc.recenter_bounds_flag:
                pyde_save_to_pickle(mc,
                                    population,
                                    starting_point,
                                    theta_dict,
                                    prefix='orig')

                mc.recenter_bounds(starting_point)
                population = mc.fix_population(starting_point, population)
                starting_point = np.median(population, axis=0)

                print('Boundaries redefined after PyDE output')

            pyde_save_to_pickle(mc, population, starting_point, theta_dict)

            print('PyDE completed')
            sys.stdout.flush()

    results_analysis.results_resumen(mc,
                                     starting_point,
                                     compute_lnprob=True,
                                     is_starting_point=True)

    if mc.use_threading_pool:
        if mc.emcee_parameters['version'] == '2':
            threads_pool = emcee.interruptible_pool.InterruptiblePool(
                mc.emcee_parameters['nwalkers'])
        else:
            from multiprocessing.pool import Pool as InterruptiblePool
            threads_pool = InterruptiblePool(mc.emcee_parameters['nwalkers'])

    if mc.emcee_parameters['multirun'] and not reloaded_emcee_multirun:

        for ii in range(0, mc.emcee_parameters['multirun_iter']):
            print('emcee exploratory run #', ii, ' of ',
                  mc.emcee_parameters['multirun_iter'])
            # sampler = emcee.EnsembleSampler(mc.emcee_parameters['nwalkers'], mc.ndim, mc,
            #                                 threads=mc.emcee_parameters['nwalkers'])
            if mc.use_threading_pool:
                sampler = emcee.EnsembleSampler(
                    mc.emcee_parameters['nwalkers'],
                    mc.ndim,
                    mc,
                    pool=threads_pool)
            else:
                sampler = emcee.EnsembleSampler(
                    mc.emcee_parameters['nwalkers'], mc.ndim, mc)

            population, prob, state = sampler.run_mcmc(
                population, mc.emcee_parameters['multirun'])
            flatchain = emcee_flatchain(sampler.chain,
                                        mc.emcee_parameters['nburn'],
                                        mc.emcee_parameters['thin'])
            results_analysis.results_resumen(mc, flatchain)

            max_ind = np.argmax(prob)
            starting_point = population[max_ind, :]

            population = np.asarray([
                starting_point + 1e-4 * np.random.randn(mc.ndim)
                for i in range(mc.emcee_parameters['nwalkers'])
            ])
            sampler.reset()

            theta_dict = results_analysis.get_theta_dictionary(mc)
            emcee_save_to_cpickle(mc,
                                  starting_point,
                                  population,
                                  prob,
                                  state,
                                  sampler,
                                  theta_dict,
                                  prefix='MR_' + repr(ii))

        emcee_save_to_cpickle(mc,
                              starting_point,
                              population,
                              prob,
                              state,
                              sampler,
                              theta_dict,
                              prefix='MR')

        flatchain = emcee_flatchain(sampler.chain,
                                    mc.emcee_parameters['nburn'],
                                    mc.emcee_parameters['thin'])
        results_analysis.print_integrated_ACF(sampler.chain, theta_dict,
                                              mc.emcee_parameters['thin'])
        results_analysis.results_resumen(mc, flatchain)

        print('emcee exploratory runs completed')
        sys.stdout.flush()

    print()
    print('Running emcee')
    state = None

    if mc.use_threading_pool:
        sampler = emcee.EnsembleSampler(mc.emcee_parameters['nwalkers'],
                                        mc.ndim,
                                        mc,
                                        pool=threads_pool)
    else:
        sampler = emcee.EnsembleSampler(mc.emcee_parameters['nwalkers'],
                                        mc.ndim, mc)

    if mc.emcee_parameters['nsave'] > 0:
        print()
        print(' Saving temporary steps')
        niter = int(mc.emcee_parameters['nsteps'] /
                    mc.emcee_parameters['nsave'])
        sampled = 0
        for i in range(0, niter):
            population, prob, state = sampler.run_mcmc(
                population,
                mc.emcee_parameters['nsave'],
                thin=mc.emcee_parameters['thin'],
                rstate0=state)
            sampled += mc.emcee_parameters['nsave']
            theta_dict = results_analysis.get_theta_dictionary(mc)
            emcee_save_to_cpickle(mc,
                                  starting_point,
                                  population,
                                  prob,
                                  state,
                                  sampler,
                                  theta_dict,
                                  samples=sampled)

            flatchain = emcee_flatchain(sampler.chain,
                                        mc.emcee_parameters['nburn'],
                                        mc.emcee_parameters['thin'])
            results_analysis.print_integrated_ACF(sampler.chain, theta_dict,
                                                  mc.emcee_parameters['thin'])
            results_analysis.results_resumen(mc, flatchain)

            print()
            print(sampled, '  steps completed, average lnprob:, ',
                  np.median(prob))

            sys.stdout.flush()

    else:
        population, prob, state = sampler.run_mcmc(
            population,
            mc.emcee_parameters['nsteps'],
            thin=mc.emcee_parameters['thin'])

        theta_dict = results_analysis.get_theta_dictionary(mc)
        emcee_save_to_cpickle(mc, starting_point, population, prob, state,
                              sampler, theta_dict)

        flatchain = emcee_flatchain(sampler.chain,
                                    mc.emcee_parameters['nburn'],
                                    mc.emcee_parameters['thin'])
        results_analysis.print_integrated_ACF(sampler.chain, theta_dict,
                                              mc.emcee_parameters['thin'])
        results_analysis.results_resumen(mc, flatchain)
    print()
    print('emcee completed')

    if mc.use_threading_pool:
        # close the pool of threads
        threads_pool.close()
        threads_pool.terminate()
        threads_pool.join()
    """ A dummy file is created to let the cpulimit script to proceed with the next step"""
    emcee_create_dummy_file(mc)

    if return_output:
        return mc, sampler.chain, sampler.lnprobability
Beispiel #55
0
def main() -> None:
    """Run BarcSeek"""
    parser = arguments.set_args()  # type: argparse.ArgumentParser
    if not sys.argv[1:]:
        sys.exit(parser.print_help())
    args = vars(parser.parse_args())  # type: Dict[str, Any]
    #   Make an output directory
    # if os.path.exists(args['outdirectory']):
    #     args['outdirectory'] = args['outdirectory'] + time.strftime('_%Y-%m-%d_%H:%M')
    os.makedirs(args['outdirectory'], exist_ok=True)
    #   Make a prefix for project-level output files
    output_prefix = os.path.join(args['outdirectory'],
                                 sys.argv[0])  # type: str
    #   Setup the logger
    #   Formatting values
    log_format = '%(asctime)s %(levelname)s:\t%(message)s'  # type: str
    date_format = '%Y-%m-%d %H:%M:%S'  # type: str
    #   Formatters
    stripped_formatter = utilities.StrippedFormatter(
        fmt=log_format, datefmt=date_format)  # utilities.StrippedFormatter
    colored_formater = utilities.ColoredFormatter(
        fmt=log_format,
        datefmt=date_format)  # type: utilities.ColoredFormatter
    #   Open /dev/null (or whatever it is on Windows) to send basic stream information to
    devnull = open(os.devnull, 'w')
    #   Configure the logger
    verbosity = _set_verbosity(level=args['verbosity'])  # type: int
    logging.basicConfig(
        stream=devnull,
        level=verbosity,
    )
    #   If we're being verbose, capture other warnings (mainly matplotlib and numpy)
    #   Otherwise, ignore them
    if verbosity == logging.DEBUG:
        logging.captureWarnings(True)
    else:
        warnings.filterwarnings('ignore')
    #   Setup a FileHandler for the log file
    #   Use a StrippedFormatter to remove extra ANSI color codes
    logname = output_prefix + '.log'
    logfile = logging.FileHandler(filename=logname,
                                  mode='w')  # type: Logging.FileHandler
    logfile.setFormatter(stripped_formatter)
    logging.getLogger().addHandler(logfile)
    #   Setup the console handler
    #   Use a ColoredFormatter because colors are cool
    console = logging.StreamHandler()  # type: logging.StreamHandler
    console.setFormatter(colored_formater)
    logging.getLogger().addHandler(console)
    #   Begin the program
    logging.info("Welcome to %s!", os.path.basename(sys.argv[0]))
    program_start = time.time()  # type: float
    #   Read in the barcodes
    barcodes_dict = barcodes.read_barcodes(
        barcodes_file=args['barcodes'])  # type: Dict[str, str]
    if barcodes.barcode_check(barcode_dict=barcodes_dict):
        raise ValueError(
            logging.error("Cannot have ambiguous or duplicate barcodes"))
    #   Read in the sample sheet and match barcode sequences to each sample
    sample_sheet = utilities.load_sample_sheet(
        sheet_file=args['sample_sheet']
    )  # type: Dict[str, Tuple[str, Optional[str]]]
    sample_barcodes = utilities.match_barcodes(
        sample_sheet=sample_sheet, barcodes_dictionary=barcodes_dict
    )  # type: Dict[str, Tuple[str, Optional[str]]]
    print(sample_barcodes)
    raise SystemExit
    #   Create the multiprocessing pool
    #   Tell the pool to ignore SIGINT (^C)
    #   by turning INTERUPT signals into IGNORED signals
    sigint_handler = signal.signal(signal.SIGINT,
                                   signal.SIG_IGN)  # type: function
    #   Setup our multiprocessing pool
    #   Allow the user to specify the number of jobs to run at once
    #   If not specified, let multiprocessing figure it out
    if args['num_cores']:
        pool = Pool(processes=args['num_cores'])
    else:
        pool = Pool()
    #   Re-enable the capturing of SIGINT, catch with KeyboardInterrupt
    #   or ExitPool, depending on how the exit was initiated
    #   Note: SystemExits are swallowed by Pool, no way to change that
    signal.signal(signal.SIGINT, sigint_handler)
    if getattr(pool, '_processes') > 1:
        try:
            #   Use map_async and get with a large timeout
            #   to allow for KeyboardInterrupts to be caught
            #   and handled with the try/except
            pass
        except KeyboardInterrupt:
            pool.terminate()
            pool.join()
            raise SystemExit('\nkilled')
        else:
            pool.join()
    #   Otherwise, don't bother with pool.map() make life easy
    else:
        #   Clean up the pool
        pool.close()
        pool.terminate()
        pool.join()
        #   Use standard map
    #   End the program
    logging.debug("Entire program took %s seconds to run",
                  round(time.time() - program_start, 3))
    devnull.close()
    try:
        logfile.close()
    except NameError:
        pass