Exemple #1
0
def test_lock_args():
    """Test overhead of nonblocking threads"""
    try:
        from concurrent.futures import ThreadPoolExecutor
        from threading import RLock
    except ImportError:
        raise SkipTest

    total = 8
    subtotal = 1000

    tqdm.set_lock(RLock())
    with ThreadPoolExecutor(total) as pool:
        sys.stderr.write('block ... ')
        sys.stderr.flush()
        with relative_timer() as time_tqdm:
            res = list(pool.map(worker(subtotal, True), range(total)))
            assert sum(res) == sum(range(total)) + total
        sys.stderr.write('noblock ... ')
        sys.stderr.flush()
        with relative_timer() as time_noblock:
            res = list(pool.map(worker(subtotal, False), range(total)))
            assert sum(res) == sum(range(total)) + total

    assert_performance(0.2, 'noblock', time_noblock(), 'tqdm', time_tqdm())
Exemple #2
0
def get_available_proxies(proxies, r, thread_num, write_lock):
    """
    检测可用的代理并存储
    :param proxies:
    :param r:
    :param thread_num:
    :param write_lock:
    :return:
    """
    tqdm.set_lock(write_lock)
    for proxy_ip in tqdm(proxies,
                         desc="bar{}".format(thread_num),
                         position=thread_num):
        proxie = {"http": proxy_ip.decode(), "https": proxy_ip.decode()}
        try:
            status_code = requests.get(url="http://www.baidu.com",
                                       timeout=1,
                                       proxies=proxie).status_code
            if status_code == 200:
                logger.info("Available agents:{}".format(proxy_ip))
                r.lpush(config.PROXY_AVAILABLE_66IP_KEY, proxy_ip)
            else:
                logger.info("Not available proxy")
        except:
            logger.info("Not available proxy")
Exemple #3
0
def count_words(lock, pos: int, read_q: Queue, write_q: Queue):
    """Count words in a book."""
    # pylint: disable=too-many-locals
    tqdm.set_lock(lock)
    for msg in queuer(read_q):
        with open(msg.data) as stream:
            result = {"books": [], "words": {}}

            soup = BeautifulSoup(stream, "xml")
            book = soup.Tanach.tanach.book
            book_name = str(soup.names.find_all("name")[0].string)
            book_num = int(soup.names.number.string)
            result["books"].append(
                dict(id=book_num, name=book_name, corpus="tanach.us"))

            desc = f"{os.getpid()} COUNT {book_name:<15}"
            for word in tqdm(book.find_all(["w", "q"]),
                             desc=desc,
                             position=pos):
                # NOTE: We ignore nested <x> and keep nested <s>!
                raw = get_word(word)
                clean = T.strip(raw)
                if clean in result:
                    result["words"][clean]["freq"] += 1
                else:
                    chapter = word.parent.parent["n"]
                    verse = word.parent["n"]
                    ref = f"{book_name} {chapter}:{verse}"
                    result["words"][clean] = dict(book_id=book_num,
                                                  freq=1,
                                                  ref=ref,
                                                  raw=raw)

        write_q.put(Msg("SAVE", result))
Exemple #4
0
 def _parse_preamble(self):
     '''
     @ParseDirectiveMixin._parse_preamble
     '''
     tqdm.set_lock(parallel.RLock())
     for fmt in self.args.formats:
         mkdir(path.join(self.args.target_parent, fmt))
Exemple #5
0
def main():
    parser = argparse.ArgumentParser()
    # Required parameters
    parser.add_argument("--datapath", type=str, default="data")
    parser.add_argument("--data_type", type=str, default="train")
    parser.add_argument("--pilot_version", type=int, choices=[1, 2], default=1)
    parser.add_argument("--processes", type=int, default=4)
    parser.add_argument("--data_nums", type=int, default=64)
    parser.add_argument("--seed", type=int, default=43)
    parser.add_argument("--mode", type=int, choices=[0, 1, 2], default=None)
    parser.add_argument("--SNRdb", type=float, default=None)
    parser.add_argument("--with_pure_y", action='store_true')
    parser.add_argument("--debug", action='store_true')
    args = parser.parse_args()

    H, Htest = read_data(args.datapath)
    using_H = H if args.data_type == "train" else Htest

    generate_data_fix = partial(generate_data, args=args, H=using_H)

    tqdm.set_lock(RLock())
    with Pool(processes=args.processes,
              initializer=tqdm.set_lock,
              initargs=(tqdm.get_lock(), )) as pool:
        [
            pool.map(generate_data_fix,
                     range(args.processes * i, args.processes * (i + 1)))
            for i in range(args.data_nums // args.processes)
        ]
Exemple #6
0
def outer_function(**config):
    """Outer function running inner function for each task in input dict"""
    freeze_support()  # for Windows support
    tqdm.set_lock(RLock())

    with concurrent.futures.ThreadPoolExecutor(initializer=tqdm.set_lock,
                                               initargs=(tqdm.get_lock(), ),
                                               max_workers=3) as executor:
        results_list = []
        outer_loop_kwarg = {
            'total': len(config['package']['tasks']),
            'desc': 'Outer',
            'ascii': True,
            'position': len(config['package']['tasks']),
            'leave': True
        }

        with tqdm(**outer_loop_kwarg) as out_progress:
            futuresListComp = [
                executor.submit(inner_function, **node)
                for node in config['package']['tasks']
            ]

            # Update after each completed task
            for future in concurrent.futures.as_completed(futuresListComp):
                out_progress.update()
                results_list.append(future.result())

        return results_list
Exemple #7
0
def count_words(lock, pos: int, read_q: Queue, write_q: Queue):
    """Count words in a book."""
    tqdm.set_lock(lock)
    for msg in queuer(read_q):
        result = {"books": [], "words": {}}

        book = BeautifulSoup(Path(msg.data).read_text(), "lxml").find("text")
        book_id = int(book["num"])
        result["books"].append(
            dict(id=book_id, name=book["name"], corpus="chabad.org"))

        desc = f"{os.getpid()} COUNT {book['name']:<15}"
        for line in tqdm(book.find_all("line"), desc=desc, position=pos):
            for raw in get_words(line.string):
                clean = T.strip(raw)
                if not clean:
                    continue

                if clean in result["words"]:
                    result["words"][clean]["freq"] += 1
                else:
                    ref = line["ref"]
                    result["words"][clean] = dict(book_id=book_id,
                                                  freq=1,
                                                  ref=ref,
                                                  raw=raw)

        write_q.put(Msg("SAVE", result))
Exemple #8
0
 def _parse_preamble(self):
     '''
     @ParseDirectiveMixin._parse_preamble
     '''
     tqdm.set_lock(parallel.RLock())
     for fmt in self.args.formats:
         mkdir(path.join(self.args.target_parent, fmt))
     self.pools.progress.worker_kwargs['pcount'] = len(self.frontier) * len(
         self.args.formats)
Exemple #9
0
        def _init(lock, id_queue):
            """Set up tqdm lock and worker process index.

            See https://stackoverflow.com/a/42817946
            Fixes tqdm line position when |files| > terminal-height
            so only |workers| progressbars are shown at a time
            """
            global current_process_position
            current_process_position = id_queue.get()
            tqdm.set_lock(lock)
Exemple #10
0
 def emit(self, record):
     try:
         msg = self.format(record)
         tqdm.set_lock(Lock())
         tqdm.write(msg)
         self.flush()
     except (KeyboardInterrupt, SystemExit):
         raise
     except:
         self.handleError(record)
Exemple #11
0
    def runBacktesting(self, position=1, task_text='1/1'):
        # import pdb
        # pdb.set_trace()

        """运行回测"""
        # 载入历史数据
        self.loadHistoryData()

        # 首先根据回测模式,确认要使用的数据类
        if self.mode == self.BAR_MODE:
            dataClass = VtBarData
            func = self.newBar
        else:
            dataClass = VtTickData
            func = self.newTick

        # self.output(u'开始回测')

        self.strategy.inited = True
        self.strategy.onInit()
        # self.output(u'策略初始化完成')

        self.strategy.trading = True
        self.strategy.onStart()
        # self.output(u'策略启动完成')

        # self.output(u'开始回放数据')



        # 进度条
        count = self.dbCursor.count()
        # self.dbCursor.batch_size(16 * 1024)

        from tqdm import tqdm

        total = count - 1
        text = "progresser #{}".format(task_text)
        tqdm.set_lock(progress_lock)
        progress = tqdm(
            total=total,
            position=position,
            desc=text
        )

        for d in self.dbCursor:
            data = dataClass()
            data.__dict__ = d
            func(data)

            if self.firstVolume == 0:
                self.firstVolume = 10000.0 / d['close']
            self.lastPrice = d['close']
            progress.update()
        progress.close()
Exemple #12
0
def prepare_dataset(video_path, frame_folder, flow_folder, name, speeds=None):
    tqdm.set_lock(Lock())  # manually set internal lock
    #Step 1, Extract frames and speed
    dataframe_dict = {}
    if not os.path.exists(frame_folder):
        os.makedirs(frame_folder)
    print("Reading the video file")
    video_sk = skvideo.io.vread(video_path)
    print("Extracting the frames")
    for index, frame in enumerate(tqdm(video_sk)):    
        saving_path = os.path.join(frame_folder, str(index)+'.jpg')
        if speeds is None:
            speed = 0
        else:
            speed = speeds[index]
        dataframe_dict[index] = [saving_path, index, speed]
        skvideo.io.vwrite(saving_path, frame)
    
    processed_dataframe = pd.DataFrame.from_dict(dataframe_dict, orient='index')
    processed_dataframe.columns = ['frame_path', 'frame_index', 'speed']
    print("Saving the dataframe")
    processed_dataframe.to_csv(os.path.join(PREPARED_DATA_PATH, name +'_meta.csv'), index=False)
    #Step 2, compute optical flow between frames and average the speed
    flow_dict = {}
    if not os.path.exists(flow_folder):
        os.makedirs(flow_folder)
    print("Computing the optical flow")
    for index in tqdm(range(len(processed_dataframe ) - 1)):
        idx1 = index
        idx2 = index + 1
        frame1 = processed_dataframe.iloc[[idx1]]
        frame2 = processed_dataframe.iloc[[idx2]]

        assert(frame2['frame_index'].values[0] - frame1['frame_index'].values[0] == 1)
        assert(frame2['frame_index'].values[0] > frame1['frame_index'].values[0])

        frame1_path = frame1['frame_path'].values[0]
        frame1_speed = frame1['speed'].values[0]
        frame2_path = frame2['frame_path'].values[0]
        frame2_speed = frame2['speed'].values[0]

        flow = process(frame1_path, frame2_path, SIZE)

        flow_path = os.path.join(flow_folder, str(index) + '.png') 

        cv2.imwrite(flow_path, flow)

        speed = np.mean([frame1_speed, frame2_speed]) 
        flow_dict[index] = [flow_path, speed]

    flow_dataframe = pd.DataFrame.from_dict(flow_dict, orient='index')
    flow_dataframe.columns = ['flow_path', 'speed']
    print("Saving the flow dataframe")
    flow_dataframe.to_csv(os.path.join(PREPARED_DATA_PATH, name +'_flow_meta.csv'), index=False)
Exemple #13
0
def test_threadpool():
    """Test concurrent.futures.ThreadPoolExecutor"""
    try:
        from concurrent.futures import ThreadPoolExecutor
        from threading import RLock
    except ImportError:
        raise SkipTest

    tqdm.set_lock(RLock())
    with ThreadPoolExecutor(8) as pool:
        res = list(tqdm(pool.map(incr_bar, range(100)), disable=True))
    assert sum(res) == sum(range(1, 101))
Exemple #14
0
def save_database(lock, pos, write_q):
    """Save books, words, and occurrences to the database."""
    # pylint: disable=too-many-locals
    db_create(app, db)
    tqdm.set_lock(lock)

    book_offset = 0
    word_offset = 0
    known_words = {}
    try:
        book_offset = Book.query.add_columns(func.max(Book.id)).first()[-1] or 0
        word_offset = Word.query.add_columns(func.max(Word.id)).first()[-1] or 0
        known_words = {w.hebrew: w.id for w in Word.query.all()}
    except Exception:
        pass

    word_offset += 1
    parser = grammar.Parser()
    for msg in queuer(write_q):
        books = msg.data["books"]
        words = []
        occur = []

        for book in books:
            book["id"] += book_offset

        desc = f"{os.getpid()} SAVE  {books[0]['name']:<15}"
        for clean, word in tqdm(msg.data["words"].items(), desc=desc, position=pos):
            word_id = known_words.get(clean, word_offset)
            if clean not in known_words:  # new word
                parsed = parse_word(parser, word["raw"], clean, word["ref"])
                if not parsed:
                    continue

                parsed["id"] = word_id
                known_words[clean] = word_id
                word_offset += 1
                words.append(parsed)

            occur.append(
                dict(
                    book_id=word["book_id"] + book_offset,
                    word_id=word_id,
                    ref=word["ref"],
                    freq=word["freq"],
                )
            )

        for obj, values in {Book: books, Word: words, Freq: occur}.items():
            if not values:
                continue
            db.engine.execute(obj.__table__.insert().values(values))
Exemple #15
0
def save_words(lock, pos, write_q):
    """Save words to the database."""
    # pylint: disable=too-many-locals
    db_create(app, db)
    tqdm.set_lock(lock)

    seen = {}
    parser = grammar.Parser()
    for msg in queuer(write_q):
        book_id = msg.data["num"]
        book_name = msg.data["name"]

        # objects to insert into database
        book = {"id": book_id, "name": book_name}
        words = []
        occur = []

        desc = f"{os.getpid()} SAVE  {book_name:<15}"
        for clean, stats in tqdm(msg.data["words"].items(), desc=desc, position=pos):
            word_id = seen.get(clean, len(seen) + 1)
            raw = stats["raw"]
            if clean not in seen:
                seen[clean] = word_id
                parsed = parser.parse(raw)
                if not "".join(parsed.vowel):  # unvowelized word
                    continue

                syllables = parser.syllabify(parsed)
                words.append(
                    dict(
                        id=word_id,
                        hebrew=clean,
                        shemot=grammar.isshemot(clean),
                        gematria=grammar.gematria(clean),
                        parsed=str(parsed.flat()),
                        syllables=str(syllables),
                        syllen=len(syllables),
                        rules=str(parsed.rules.flat()),
                    )
                )

            occur.append(
                dict(
                    book_id=book_id,
                    word_id=word_id,
                    ref=stats["ref"],
                    freq=stats["freq"],
                )
            )

        for obj, values in {Book: book, Word: words, Freq: occur}.items():
            db.engine.execute(obj.__table__.insert().values(values))
Exemple #16
0
def scp_operation(args):
    credentials = Credentials()
    targets = args.hostname.split(",")
    tqdm.set_lock(RLock())
    with Pool(initializer=tqdm.set_lock, initargs=(tqdm.get_lock(), )) as pool:
        pool.starmap(
            scp_process,
            zip(
                targets,
                repeat(credentials),
                repeat(args.filename),
                repeat(args.dst_file_path),
                list(range(len(targets))),
            ),
        )
Exemple #17
0
def parallel_program(flash_func, devices, dfu_file):
    '''Parallel programming'''
    queue = queue_.Queue()
    threads = []
    results = []
    tqdm.monitor_interval = 0
    tqdm.set_lock(RLock())

    for pos, dev in enumerate(devices):
        threads.append(
            Thread(target=flash_func, args=[dev, dfu_file, queue, pos]))
    for thread in threads:
        thread.start()
    for thread in threads:
        thread.join()
        results.append(queue.get())
    print('\n' * len(devices))
    return results
Exemple #18
0
def get_proxy_info(start_idx, end_idx, thread_n, write_lock):
    tqdm.set_lock(write_lock)
    text = "bar {}".format(thread_n)
    for page in tqdm(range(start_idx,
                           int(end_idx) + 1),
                     desc=text,
                     position=thread_n,
                     leave=False):
        time.sleep(0.5)
        response = requests.get(url=URL_KUAIDAILI.format(str(page)))
        soup = BeautifulSoup(response.content, "lxml")
        tags_aims = soup.select(selector="#list > table > tbody > tr")
        for i in range(len(tags_aims)):
            if tags_aims[i]:
                info_list = list(tags_aims[i].strings)
                proxy_ip = info_list[1] + info_list[3]
                r.lpush("proxy_ips", proxy_ip)
            else:
                pass
        response.close()
Exemple #19
0
def spawn_processes(init_fn, read_fn, write_fn, num_readers=1, num_writers=1):
    """Start readers and writers."""
    tqdm.set_lock(RLock())

    write_q = Queue()
    write_fn = partial(write_fn, lock=tqdm.get_lock(), write_q=write_q)
    writers = [
        Process(daemon=True, target=partial(write_fn, pos=i))
        for i in range(num_writers, 0, -1)
    ]

    read_q = Queue()
    read_fn = partial(read_fn, lock=tqdm.get_lock(), read_q=read_q, write_q=write_q)
    readers = [
        Process(daemon=True, target=partial(read_fn, pos=i))
        for i in range(num_readers + 1, num_writers, -1)
    ]

    for p in readers + writers:
        p.start()

    init_fn(read_q)
    notify_and_join(read_q, readers)
    notify_and_join(write_q, writers)
Exemple #20
0
    def assign(self, X, no_scale=False, progress=True):
        '''Assign the clustering of new samples using :func:`~PopPUNK.dbscan.assign_samples_dbscan`

        Args:
            X (numpy.array)
                Core and accessory distances
            no_scale (bool)
                Do not scale X

                [default = False]
            progress (bool)
                Show progress bar

                [default = True]
        Returns:
            y (numpy.array)
                Cluster assignments by samples
        '''
        if not self.fitted:
            raise RuntimeError("Trying to assign using an unfitted model")
        else:
            if no_scale:
                scale = np.array([1, 1], dtype=X.dtype)
            else:
                scale = self.scale
            if progress:
                sys.stderr.write("Assigning distances with DBSCAN model\n")

            y = np.zeros(X.shape[0], dtype=int)
            block_size = 5000
            n_blocks = (X.shape[0] - 1) // block_size + 1
            with SharedMemoryManager() as smm:
                shm_X = smm.SharedMemory(size=X.nbytes)
                X_shared_array = np.ndarray(X.shape,
                                            dtype=X.dtype,
                                            buffer=shm_X.buf)
                X_shared_array[:] = X[:]
                X_shared = NumpyShared(name=shm_X.name,
                                       shape=X.shape,
                                       dtype=X.dtype)

                shm_y = smm.SharedMemory(size=y.nbytes)
                y_shared_array = np.ndarray(y.shape,
                                            dtype=y.dtype,
                                            buffer=shm_y.buf)
                y_shared_array[:] = y[:]
                y_shared = NumpyShared(name=shm_y.name,
                                       shape=y.shape,
                                       dtype=y.dtype)

                tqdm.set_lock(RLock())
                process_map(partial(assign_samples,
                                    X=X_shared,
                                    y=y_shared,
                                    model=self,
                                    scale=scale,
                                    chunk_size=block_size,
                                    values=False),
                            range(n_blocks),
                            max_workers=self.threads,
                            chunksize=min(10, max(1,
                                                  n_blocks // self.threads)),
                            disable=(progress == False))

                y[:] = y_shared_array[:]

        return y
Exemple #21
0
from multiprocessing import Pool, RLock, freeze_support

from core.aesthetics import *

print_cyan_bold("my text is very good")


class SomeClass:
    def __init__(self, param1, param2):
        self.param1 = param1
        self.param2 = param2

    def some_function(self):
        for i in range(100):
            print("vad ca se misca foarte bine")


def progresser(n):
    interval = 0.001 / (n + 2)
    total = 5000
    text = "#{}, est. {:<04.2}s".format(n, interval * total)
    for _ in trange(total, desc=text, position=n):
        # do stuff
        sleep(0.1)


if __name__ == '__main__':
    freeze_support()  # for Windows support
    tqdm.set_lock(RLock())  # for managing output contention
    p = Pool(initializer=tqdm.set_lock, initargs=(tqdm.get_lock(), ))
    p.map(progresser, range(2))
Exemple #22
0
 def run_listener(write_lock):
     # This function is used to solve the error when tqdm is used inside thread
     # https://github.com/tqdm/tqdm/issues/323
     tqdm.set_lock(write_lock)
     worker_fn()
Exemple #23
0
  _str = unicode
  _range = xrange
  from StringIO import StringIO
  string_types = (basestring,)
except NameError:
  # python3
  _str = str
  _range = range
  from io import StringIO
  string_types = (str,)
try:
  from threading import RLock
except ImportError:
  tqdm = tqdm_std
else:
  tqdm_std.set_lock(RLock())
  tqdm = partial(tqdm_std, lock_args=(False,))

__author__ = "Casper da Costa-Luis <*****@*****.**>"
__date__ = "2016-2020"
__licence__ = "[MPLv2.0](https://mozilla.org/MPL/2.0/)"
__all__ = ["TERM_WIDTH", "int_cast_or_len", "Max", "fext", "_str", "tqdm",
           "tighten", "check_output", "print_unicode", "StringIO", "Str"]
__copyright__ = ' '.join(("Copyright (c)", __date__, __author__, __licence__))
__license__ = __licence__  # weird foreign language

log = logging.getLogger(__name__)
TERM_WIDTH = _screen_shape_wrapper()(sys.stdout)[0]
if not TERM_WIDTH:
  # non interactive pipe
  TERM_WIDTH = 256
Exemple #24
0
 def _parse_preamble(self):
     '''
     @ParseDirectiveMixin._parse_preamble
     '''
     tqdm.set_lock(parallel.RLock())
def init_aleph(lock=None):
    global api
    api = AlephAPI()
    if lock is not None:
        tqdm.set_lock(lock)
Exemple #26
0
def initialize_tqdm():
    tqdm.set_lock(threading.RLock())
        tb_logger.attach(
            trainer,
            log_handler=GradsHistHandler(IGTrainer.model),
            event_name=Events.EPOCH_COMPLETED,
        )
        tb_logger.close()

    handler = EarlyStopping(
        patience=5,
        score_function=lambda engine: engine.state.metrics["accuracy"],
        trainer=trainer,
    )
    val_evaluator.add_event_handler(Events.COMPLETED, handler)
    pbar = ProgressBar(bar_format="")
    pbar.attach(trainer, output_transform=lambda x: {"loss": x})
    logging_board(model_name)

    from tqdm import tqdm
    from multiprocessing import Lock

    tqdm.set_lock(Lock())
    trainer.run(loader, max_epochs=5)
    checkpoint_epochs_file = ".".join(
        checkpoint_file.split(".")[:-1]) + ".05.ckpt"
    IGTrainer.save_model(checkpoint_epochs_file)
    trainer.run(loader, max_epochs=10)
    checkpoint_epochs_file = ".".join(
        checkpoint_file.split(".")[:-1]) + ".15.ckpt"
    IGTrainer.save_model(checkpoint_epochs_file)
Exemple #28
0
def scrape():
    global filename

    start_time = datetime.now()

    # Building the filename
    filename = str(filename).replace("$DATE",
                                     start_time.strftime("%Y%m%d%H%M%S"))
    search = str(args.search).replace(" ", "")
    if len(search) > 10:
        search = search[0:9]
    filename = str(filename).replace("$SEARCH", search)

    func_args = []
    stats_dict = {}
    if args.engines and len(args.engines) > 0:
        eng = args.engines[0]
        for e in eng:
            try:
                if not (args.exclude and len(args.exclude) > 0
                        and e in args.exclude[0]):
                    func_args.append("{}:{}".format(e, args.search))
                    stats_dict[e] = 0
            except KeyError:
                print(
                    "Error: search engine {} not in the list of supported engines"
                    .format(e))
    else:
        for e in supported_engines.keys():
            if not (args.exclude and len(args.exclude) > 0
                    and e in args.exclude[0]):
                func_args.append("{}:{}".format(e, args.search))
                stats_dict[e] = 0

    # Doing multiprocessing
    units = min((cpu_count() - 1), len(func_args))
    if args.mp_units and args.mp_units > 0:
        units = min(args.mp_units, len(func_args))
    print("search.py started with {} processing units...".format(units))
    freeze_support()

    results = {}
    with Pool(units,
              initializer=tqdm.set_lock(RLock()),
              initargs=(tqdm.get_lock(), )) as p:
        results_map = p.map(run_method, func_args)
        results = reduce(lambda a, b: a + b
                         if b is not None else a, results_map)

    stop_time = datetime.now()

    if not args.continuous_write:
        with open(filename, 'w', newline='') as csv_file:
            csv_writer = csv.writer(csv_file,
                                    delimiter=field_delim,
                                    quoting=csv.QUOTE_ALL)
            for r in results:
                write_to_csv(csv_writer, r)

    total = 0
    print("\nReport:")
    print("  Execution time: %s seconds" % (stop_time - start_time))
    print("  Results per engine:")
    for r in results:
        stats_dict[r['engine']] += 1
    for s in stats_dict:
        n = stats_dict[s]
        print("    {}: {}".format(s, str(n)))
        total += n
    print("  Total: {} links written to {}".format(str(total), filename))
Exemple #29
0
""" Progress bars in Nuitka.

This is responsible for wrapping the rendering of progress bar and emitting tracing
to the user while it's being displayed.

"""

from nuitka import Tracing
from nuitka.utils.ThreadedExecutor import RLock

try:
    from tqdm import tqdm
except ImportError:
    tqdm = None
else:
    tqdm.set_lock(RLock())


class NuitkaProgessBar(object):
    def __init__(self, stage, total, unit):
        self.stage = stage
        self.total = total
        self.unit = unit

        # No item under work yet.
        self.item = None

        # No progress yet.
        self.progress = 0

        # Render immediately with 0 progress.

def process_collection(collection):
    fid = collection["foreign_id"]
    fid = fid.replace("/", "")
    fname = f"./dataset_components/{fid}.json"
    if os.path.exists(fname):
        return
    try:
        components = calculate_components(collection)
    except AlephException as e:
        print(f"Aleph Error: {fid}: {e}")
        return
    with open(fname, "w+") as fd:
        data = {
            "components_histogram": dict(components),
            "collection": collection,
        }
        fd.write(json.dumps(data))


if __name__ == "__main__":
    init_aleph()
    collections = api.filter_collections("*")
    N = collections.result["total"]
    tqdm.set_lock(mp.RLock())
    with mp.Pool(processes=4, initializer=init_aleph, initargs=(tqdm.get_lock(),)) as p:
        results = p.imap_unordered(process_collection, collections, chunksize=32)
        for _ in tqdm(results, total=N, position=0):
            pass