Python set_lock Exemples, tqdm.tqdm.set_lock Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : tests_perf.py Projet : Jason1011/JayScraper

def test_lock_args():
    """Test overhead of nonblocking threads"""
    try:
        from concurrent.futures import ThreadPoolExecutor
        from threading import RLock
    except ImportError:
        raise SkipTest

    total = 8
    subtotal = 1000

    tqdm.set_lock(RLock())
    with ThreadPoolExecutor(total) as pool:
        sys.stderr.write('block ... ')
        sys.stderr.flush()
        with relative_timer() as time_tqdm:
            res = list(pool.map(worker(subtotal, True), range(total)))
            assert sum(res) == sum(range(total)) + total
        sys.stderr.write('noblock ... ')
        sys.stderr.flush()
        with relative_timer() as time_noblock:
            res = list(pool.map(worker(subtotal, False), range(total)))
            assert sum(res) == sum(range(total)) + total

    assert_performance(0.2, 'noblock', time_noblock(), 'tqdm', time_tqdm())

Exemple #2

0

Afficher le fichier

Fichier : 66ip.py Projet : littleQ-zzq/ProxyPool

def get_available_proxies(proxies, r, thread_num, write_lock):
    """
    检测可用的代理并存储
    :param proxies:
    :param r:
    :param thread_num:
    :param write_lock:
    :return:
    """
    tqdm.set_lock(write_lock)
    for proxy_ip in tqdm(proxies,
                         desc="bar{}".format(thread_num),
                         position=thread_num):
        proxie = {"http": proxy_ip.decode(), "https": proxy_ip.decode()}
        try:
            status_code = requests.get(url="http://www.baidu.com",
                                       timeout=1,
                                       proxies=proxie).status_code
            if status_code == 200:
                logger.info("Available agents：{}".format(proxy_ip))
                r.lpush(config.PROXY_AVAILABLE_66IP_KEY, proxy_ip)
            else:
                logger.info("Not available proxy")
        except:
            logger.info("Not available proxy")

Exemple #3

0

Afficher le fichier

Fichier : tanach_us.py Projet : ohizkiya/hebphonics

def count_words(lock, pos: int, read_q: Queue, write_q: Queue):
    """Count words in a book."""
    # pylint: disable=too-many-locals
    tqdm.set_lock(lock)
    for msg in queuer(read_q):
        with open(msg.data) as stream:
            result = {"books": [], "words": {}}

            soup = BeautifulSoup(stream, "xml")
            book = soup.Tanach.tanach.book
            book_name = str(soup.names.find_all("name")[0].string)
            book_num = int(soup.names.number.string)
            result["books"].append(
                dict(id=book_num, name=book_name, corpus="tanach.us"))

            desc = f"{os.getpid()} COUNT {book_name:<15}"
            for word in tqdm(book.find_all(["w", "q"]),
                             desc=desc,
                             position=pos):
                # NOTE: We ignore nested <x> and keep nested <s>!
                raw = get_word(word)
                clean = T.strip(raw)
                if clean in result:
                    result["words"][clean]["freq"] += 1
                else:
                    chapter = word.parent.parent["n"]
                    verse = word.parent["n"]
                    ref = f"{book_name} {chapter}:{verse}"
                    result["words"][clean] = dict(book_id=book_num,
                                                  freq=1,
                                                  ref=ref,
                                                  raw=raw)

        write_q.put(Msg("SAVE", result))

Exemple #4

0

Afficher le fichier

Fichier : directives.py Projet : vaginessa/analyzeEVTX

 def _parse_preamble(self):
     '''
     @ParseDirectiveMixin._parse_preamble
     '''
     tqdm.set_lock(parallel.RLock())
     for fmt in self.args.formats:
         mkdir(path.join(self.args.target_parent, fmt))

Exemple #5

0

Afficher le fichier

def main():
    parser = argparse.ArgumentParser()
    # Required parameters
    parser.add_argument("--datapath", type=str, default="data")
    parser.add_argument("--data_type", type=str, default="train")
    parser.add_argument("--pilot_version", type=int, choices=[1, 2], default=1)
    parser.add_argument("--processes", type=int, default=4)
    parser.add_argument("--data_nums", type=int, default=64)
    parser.add_argument("--seed", type=int, default=43)
    parser.add_argument("--mode", type=int, choices=[0, 1, 2], default=None)
    parser.add_argument("--SNRdb", type=float, default=None)
    parser.add_argument("--with_pure_y", action='store_true')
    parser.add_argument("--debug", action='store_true')
    args = parser.parse_args()

    H, Htest = read_data(args.datapath)
    using_H = H if args.data_type == "train" else Htest

    generate_data_fix = partial(generate_data, args=args, H=using_H)

    tqdm.set_lock(RLock())
    with Pool(processes=args.processes,
              initializer=tqdm.set_lock,
              initargs=(tqdm.get_lock(), )) as pool:
        [
            pool.map(generate_data_fix,
                     range(args.processes * i, args.processes * (i + 1)))
            for i in range(args.data_nums // args.processes)
        ]

Exemple #6

0

Afficher le fichier

def outer_function(**config):
    """Outer function running inner function for each task in input dict"""
    freeze_support()  # for Windows support
    tqdm.set_lock(RLock())

    with concurrent.futures.ThreadPoolExecutor(initializer=tqdm.set_lock,
                                               initargs=(tqdm.get_lock(), ),
                                               max_workers=3) as executor:
        results_list = []
        outer_loop_kwarg = {
            'total': len(config['package']['tasks']),
            'desc': 'Outer',
            'ascii': True,
            'position': len(config['package']['tasks']),
            'leave': True
        }

        with tqdm(**outer_loop_kwarg) as out_progress:
            futuresListComp = [
                executor.submit(inner_function, **node)
                for node in config['package']['tasks']
            ]

            # Update after each completed task
            for future in concurrent.futures.as_completed(futuresListComp):
                out_progress.update()
                results_list.append(future.result())

        return results_list

Exemple #7

0

Afficher le fichier

Fichier : chabad_org.py Projet : ohizkiya/hebphonics

def count_words(lock, pos: int, read_q: Queue, write_q: Queue):
    """Count words in a book."""
    tqdm.set_lock(lock)
    for msg in queuer(read_q):
        result = {"books": [], "words": {}}

        book = BeautifulSoup(Path(msg.data).read_text(), "lxml").find("text")
        book_id = int(book["num"])
        result["books"].append(
            dict(id=book_id, name=book["name"], corpus="chabad.org"))

        desc = f"{os.getpid()} COUNT {book['name']:<15}"
        for line in tqdm(book.find_all("line"), desc=desc, position=pos):
            for raw in get_words(line.string):
                clean = T.strip(raw)
                if not clean:
                    continue

                if clean in result["words"]:
                    result["words"][clean]["freq"] += 1
                else:
                    ref = line["ref"]
                    result["words"][clean] = dict(book_id=book_id,
                                                  freq=1,
                                                  ref=ref,
                                                  raw=raw)

        write_q.put(Msg("SAVE", result))

Exemple #8

0

Afficher le fichier

Fichier : directives.py Projet : vaginessa/analyzePF

 def _parse_preamble(self):
     '''
     @ParseDirectiveMixin._parse_preamble
     '''
     tqdm.set_lock(parallel.RLock())
     for fmt in self.args.formats:
         mkdir(path.join(self.args.target_parent, fmt))
     self.pools.progress.worker_kwargs['pcount'] = len(self.frontier) * len(
         self.args.formats)

Exemple #9

0

Afficher le fichier

Fichier : dataset.py Projet : gasnew/renku-python

        def _init(lock, id_queue):
            """Set up tqdm lock and worker process index.

            See https://stackoverflow.com/a/42817946
            Fixes tqdm line position when |files| > terminal-height
            so only |workers| progressbars are shown at a time
            """
            global current_process_position
            current_process_position = id_queue.get()
            tqdm.set_lock(lock)

Exemple #10

0

Afficher le fichier

 def emit(self, record):
     try:
         msg = self.format(record)
         tqdm.set_lock(Lock())
         tqdm.write(msg)
         self.flush()
     except (KeyboardInterrupt, SystemExit):
         raise
     except:
         self.handleError(record)

Exemple #11

0

Afficher le fichier

    def runBacktesting(self, position=1, task_text='1/1'):
        # import pdb
        # pdb.set_trace()

        """运行回测"""
        # 载入历史数据
        self.loadHistoryData()

        # 首先根据回测模式，确认要使用的数据类
        if self.mode == self.BAR_MODE:
            dataClass = VtBarData
            func = self.newBar
        else:
            dataClass = VtTickData
            func = self.newTick

        # self.output(u'开始回测')

        self.strategy.inited = True
        self.strategy.onInit()
        # self.output(u'策略初始化完成')

        self.strategy.trading = True
        self.strategy.onStart()
        # self.output(u'策略启动完成')

        # self.output(u'开始回放数据')



        # 进度条
        count = self.dbCursor.count()
        # self.dbCursor.batch_size(16 * 1024)

        from tqdm import tqdm

        total = count - 1
        text = "progresser #{}".format(task_text)
        tqdm.set_lock(progress_lock)
        progress = tqdm(
            total=total,
            position=position,
            desc=text
        )

        for d in self.dbCursor:
            data = dataClass()
            data.__dict__ = d
            func(data)

            if self.firstVolume == 0:
                self.firstVolume = 10000.0 / d['close']
            self.lastPrice = d['close']
            progress.update()
        progress.close()

Exemple #12

0

Afficher le fichier

Fichier : data_prep.py Projet : ZwX1616/speed-challenge

def prepare_dataset(video_path, frame_folder, flow_folder, name, speeds=None):
    tqdm.set_lock(Lock())  # manually set internal lock
    #Step 1, Extract frames and speed
    dataframe_dict = {}
    if not os.path.exists(frame_folder):
        os.makedirs(frame_folder)
    print("Reading the video file")
    video_sk = skvideo.io.vread(video_path)
    print("Extracting the frames")
    for index, frame in enumerate(tqdm(video_sk)):    
        saving_path = os.path.join(frame_folder, str(index)+'.jpg')
        if speeds is None:
            speed = 0
        else:
            speed = speeds[index]
        dataframe_dict[index] = [saving_path, index, speed]
        skvideo.io.vwrite(saving_path, frame)
    
    processed_dataframe = pd.DataFrame.from_dict(dataframe_dict, orient='index')
    processed_dataframe.columns = ['frame_path', 'frame_index', 'speed']
    print("Saving the dataframe")
    processed_dataframe.to_csv(os.path.join(PREPARED_DATA_PATH, name +'_meta.csv'), index=False)
    #Step 2, compute optical flow between frames and average the speed
    flow_dict = {}
    if not os.path.exists(flow_folder):
        os.makedirs(flow_folder)
    print("Computing the optical flow")
    for index in tqdm(range(len(processed_dataframe ) - 1)):
        idx1 = index
        idx2 = index + 1
        frame1 = processed_dataframe.iloc[[idx1]]
        frame2 = processed_dataframe.iloc[[idx2]]

        assert(frame2['frame_index'].values[0] - frame1['frame_index'].values[0] == 1)
        assert(frame2['frame_index'].values[0] > frame1['frame_index'].values[0])

        frame1_path = frame1['frame_path'].values[0]
        frame1_speed = frame1['speed'].values[0]
        frame2_path = frame2['frame_path'].values[0]
        frame2_speed = frame2['speed'].values[0]

        flow = process(frame1_path, frame2_path, SIZE)

        flow_path = os.path.join(flow_folder, str(index) + '.png') 

        cv2.imwrite(flow_path, flow)

        speed = np.mean([frame1_speed, frame2_speed]) 
        flow_dict[index] = [flow_path, speed]

    flow_dataframe = pd.DataFrame.from_dict(flow_dict, orient='index')
    flow_dataframe.columns = ['flow_path', 'speed']
    print("Saving the flow dataframe")
    flow_dataframe.to_csv(os.path.join(PREPARED_DATA_PATH, name +'_flow_meta.csv'), index=False)

Exemple #13

0

Afficher le fichier

def test_threadpool():
    """Test concurrent.futures.ThreadPoolExecutor"""
    try:
        from concurrent.futures import ThreadPoolExecutor
        from threading import RLock
    except ImportError:
        raise SkipTest

    tqdm.set_lock(RLock())
    with ThreadPoolExecutor(8) as pool:
        res = list(tqdm(pool.map(incr_bar, range(100)), disable=True))
    assert sum(res) == sum(range(1, 101))

Exemple #14

0

Afficher le fichier

Fichier : __init__.py Projet : ohizkiya/hebphonics

def save_database(lock, pos, write_q):
    """Save books, words, and occurrences to the database."""
    # pylint: disable=too-many-locals
    db_create(app, db)
    tqdm.set_lock(lock)

    book_offset = 0
    word_offset = 0
    known_words = {}
    try:
        book_offset = Book.query.add_columns(func.max(Book.id)).first()[-1] or 0
        word_offset = Word.query.add_columns(func.max(Word.id)).first()[-1] or 0
        known_words = {w.hebrew: w.id for w in Word.query.all()}
    except Exception:
        pass

    word_offset += 1
    parser = grammar.Parser()
    for msg in queuer(write_q):
        books = msg.data["books"]
        words = []
        occur = []

        for book in books:
            book["id"] += book_offset

        desc = f"{os.getpid()} SAVE  {books[0]['name']:<15}"
        for clean, word in tqdm(msg.data["words"].items(), desc=desc, position=pos):
            word_id = known_words.get(clean, word_offset)
            if clean not in known_words:  # new word
                parsed = parse_word(parser, word["raw"], clean, word["ref"])
                if not parsed:
                    continue

                parsed["id"] = word_id
                known_words[clean] = word_id
                word_offset += 1
                words.append(parsed)

            occur.append(
                dict(
                    book_id=word["book_id"] + book_offset,
                    word_id=word_id,
                    ref=word["ref"],
                    freq=word["freq"],
                )
            )

        for obj, values in {Book: books, Word: words, Freq: occur}.items():
            if not values:
                continue
            db.engine.execute(obj.__table__.insert().values(values))

Exemple #15

0

Afficher le fichier

Fichier : __init__.py Projet : ohizkiya/hebphonics

def save_words(lock, pos, write_q):
    """Save words to the database."""
    # pylint: disable=too-many-locals
    db_create(app, db)
    tqdm.set_lock(lock)

    seen = {}
    parser = grammar.Parser()
    for msg in queuer(write_q):
        book_id = msg.data["num"]
        book_name = msg.data["name"]

        # objects to insert into database
        book = {"id": book_id, "name": book_name}
        words = []
        occur = []

        desc = f"{os.getpid()} SAVE  {book_name:<15}"
        for clean, stats in tqdm(msg.data["words"].items(), desc=desc, position=pos):
            word_id = seen.get(clean, len(seen) + 1)
            raw = stats["raw"]
            if clean not in seen:
                seen[clean] = word_id
                parsed = parser.parse(raw)
                if not "".join(parsed.vowel):  # unvowelized word
                    continue

                syllables = parser.syllabify(parsed)
                words.append(
                    dict(
                        id=word_id,
                        hebrew=clean,
                        shemot=grammar.isshemot(clean),
                        gematria=grammar.gematria(clean),
                        parsed=str(parsed.flat()),
                        syllables=str(syllables),
                        syllen=len(syllables),
                        rules=str(parsed.rules.flat()),
                    )
                )

            occur.append(
                dict(
                    book_id=book_id,
                    word_id=word_id,
                    ref=stats["ref"],
                    freq=stats["freq"],
                )
            )

        for obj, values in {Book: book, Word: words, Freq: occur}.items():
            db.engine.execute(obj.__table__.insert().values(values))

Exemple #16

0

Afficher le fichier

def scp_operation(args):
    credentials = Credentials()
    targets = args.hostname.split(",")
    tqdm.set_lock(RLock())
    with Pool(initializer=tqdm.set_lock, initargs=(tqdm.get_lock(), )) as pool:
        pool.starmap(
            scp_process,
            zip(
                targets,
                repeat(credentials),
                repeat(args.filename),
                repeat(args.dst_file_path),
                list(range(len(targets))),
            ),
        )

Exemple #17

0

Afficher le fichier

Fichier : kifwu.py Projet : jumi-nordic/KiTools

def parallel_program(flash_func, devices, dfu_file):
    '''Parallel programming'''
    queue = queue_.Queue()
    threads = []
    results = []
    tqdm.monitor_interval = 0
    tqdm.set_lock(RLock())

    for pos, dev in enumerate(devices):
        threads.append(
            Thread(target=flash_func, args=[dev, dfu_file, queue, pos]))
    for thread in threads:
        thread.start()
    for thread in threads:
        thread.join()
        results.append(queue.get())
    print('\n' * len(devices))
    return results

Exemple #18

0

Afficher le fichier

def get_proxy_info(start_idx, end_idx, thread_n, write_lock):
    tqdm.set_lock(write_lock)
    text = "bar {}".format(thread_n)
    for page in tqdm(range(start_idx,
                           int(end_idx) + 1),
                     desc=text,
                     position=thread_n,
                     leave=False):
        time.sleep(0.5)
        response = requests.get(url=URL_KUAIDAILI.format(str(page)))
        soup = BeautifulSoup(response.content, "lxml")
        tags_aims = soup.select(selector="#list > table > tbody > tr")
        for i in range(len(tags_aims)):
            if tags_aims[i]:
                info_list = list(tags_aims[i].strings)
                proxy_ip = info_list[1] + info_list[3]
                r.lpush("proxy_ips", proxy_ip)
            else:
                pass
        response.close()

Exemple #19

0

Afficher le fichier

Fichier : __init__.py Projet : ohizkiya/hebphonics

def spawn_processes(init_fn, read_fn, write_fn, num_readers=1, num_writers=1):
    """Start readers and writers."""
    tqdm.set_lock(RLock())

    write_q = Queue()
    write_fn = partial(write_fn, lock=tqdm.get_lock(), write_q=write_q)
    writers = [
        Process(daemon=True, target=partial(write_fn, pos=i))
        for i in range(num_writers, 0, -1)
    ]

    read_q = Queue()
    read_fn = partial(read_fn, lock=tqdm.get_lock(), read_q=read_q, write_q=write_q)
    readers = [
        Process(daemon=True, target=partial(read_fn, pos=i))
        for i in range(num_readers + 1, num_writers, -1)
    ]

    for p in readers + writers:
        p.start()

    init_fn(read_q)
    notify_and_join(read_q, readers)
    notify_and_join(write_q, writers)

Exemple #20

0

Afficher le fichier

Fichier : models.py Projet : johnlees/PopPUNK

    def assign(self, X, no_scale=False, progress=True):
        '''Assign the clustering of new samples using :func:`~PopPUNK.dbscan.assign_samples_dbscan`

        Args:
            X (numpy.array)
                Core and accessory distances
            no_scale (bool)
                Do not scale X

                [default = False]
            progress (bool)
                Show progress bar

                [default = True]
        Returns:
            y (numpy.array)
                Cluster assignments by samples
        '''
        if not self.fitted:
            raise RuntimeError("Trying to assign using an unfitted model")
        else:
            if no_scale:
                scale = np.array([1, 1], dtype=X.dtype)
            else:
                scale = self.scale
            if progress:
                sys.stderr.write("Assigning distances with DBSCAN model\n")

            y = np.zeros(X.shape[0], dtype=int)
            block_size = 5000
            n_blocks = (X.shape[0] - 1) // block_size + 1
            with SharedMemoryManager() as smm:
                shm_X = smm.SharedMemory(size=X.nbytes)
                X_shared_array = np.ndarray(X.shape,
                                            dtype=X.dtype,
                                            buffer=shm_X.buf)
                X_shared_array[:] = X[:]
                X_shared = NumpyShared(name=shm_X.name,
                                       shape=X.shape,
                                       dtype=X.dtype)

                shm_y = smm.SharedMemory(size=y.nbytes)
                y_shared_array = np.ndarray(y.shape,
                                            dtype=y.dtype,
                                            buffer=shm_y.buf)
                y_shared_array[:] = y[:]
                y_shared = NumpyShared(name=shm_y.name,
                                       shape=y.shape,
                                       dtype=y.dtype)

                tqdm.set_lock(RLock())
                process_map(partial(assign_samples,
                                    X=X_shared,
                                    y=y_shared,
                                    model=self,
                                    scale=scale,
                                    chunk_size=block_size,
                                    values=False),
                            range(n_blocks),
                            max_workers=self.threads,
                            chunksize=min(10, max(1,
                                                  n_blocks // self.threads)),
                            disable=(progress == False))

                y[:] = y_shared_array[:]

        return y

Exemple #21

0

Afficher le fichier

from multiprocessing import Pool, RLock, freeze_support

from core.aesthetics import *

print_cyan_bold("my text is very good")


class SomeClass:
    def __init__(self, param1, param2):
        self.param1 = param1
        self.param2 = param2

    def some_function(self):
        for i in range(100):
            print("vad ca se misca foarte bine")


def progresser(n):
    interval = 0.001 / (n + 2)
    total = 5000
    text = "#{}, est. {:<04.2}s".format(n, interval * total)
    for _ in trange(total, desc=text, position=n):
        # do stuff
        sleep(0.1)


if __name__ == '__main__':
    freeze_support()  # for Windows support
    tqdm.set_lock(RLock())  # for managing output contention
    p = Pool(initializer=tqdm.set_lock, initargs=(tqdm.get_lock(), ))
    p.map(progresser, range(2))

Exemple #22

0

Afficher le fichier

Fichier : chiron_eval.py Projet : ramcn/chiron-0.4

 def run_listener(write_lock):
     # This function is used to solve the error when tqdm is used inside thread
     # https://github.com/tqdm/tqdm/issues/323
     tqdm.set_lock(write_lock)
     worker_fn()

Exemple #23

0

Afficher le fichier

  _str = unicode
  _range = xrange
  from StringIO import StringIO
  string_types = (basestring,)
except NameError:
  # python3
  _str = str
  _range = range
  from io import StringIO
  string_types = (str,)
try:
  from threading import RLock
except ImportError:
  tqdm = tqdm_std
else:
  tqdm_std.set_lock(RLock())
  tqdm = partial(tqdm_std, lock_args=(False,))

__author__ = "Casper da Costa-Luis <*****@*****.**>"
__date__ = "2016-2020"
__licence__ = "[MPLv2.0](https://mozilla.org/MPL/2.0/)"
__all__ = ["TERM_WIDTH", "int_cast_or_len", "Max", "fext", "_str", "tqdm",
           "tighten", "check_output", "print_unicode", "StringIO", "Str"]
__copyright__ = ' '.join(("Copyright (c)", __date__, __author__, __licence__))
__license__ = __licence__  # weird foreign language

log = logging.getLogger(__name__)
TERM_WIDTH = _screen_shape_wrapper()(sys.stdout)[0]
if not TERM_WIDTH:
  # non interactive pipe
  TERM_WIDTH = 256

Exemple #24

0

Afficher le fichier

Fichier : directives.py Projet : vaginessa/analyzePF

 def _parse_preamble(self):
     '''
     @ParseDirectiveMixin._parse_preamble
     '''
     tqdm.set_lock(parallel.RLock())

Exemple #25

0

Afficher le fichier

Fichier : dataset_components.py Projet : alephdata/followthemoney-graph

def init_aleph(lock=None):
    global api
    api = AlephAPI()
    if lock is not None:
        tqdm.set_lock(lock)

Exemple #26

0

Afficher le fichier

Fichier : utils.py Projet : jhodges10/fioctl

def initialize_tqdm():
    tqdm.set_lock(threading.RLock())

Exemple #27

0

Afficher le fichier

Fichier : train_ignite.py Projet : chengjiun/kaggle-2019Q3-cellular

        tb_logger.attach(
            trainer,
            log_handler=GradsHistHandler(IGTrainer.model),
            event_name=Events.EPOCH_COMPLETED,
        )
        tb_logger.close()

    handler = EarlyStopping(
        patience=5,
        score_function=lambda engine: engine.state.metrics["accuracy"],
        trainer=trainer,
    )
    val_evaluator.add_event_handler(Events.COMPLETED, handler)
    pbar = ProgressBar(bar_format="")
    pbar.attach(trainer, output_transform=lambda x: {"loss": x})
    logging_board(model_name)

    from tqdm import tqdm
    from multiprocessing import Lock

    tqdm.set_lock(Lock())
    trainer.run(loader, max_epochs=5)
    checkpoint_epochs_file = ".".join(
        checkpoint_file.split(".")[:-1]) + ".05.ckpt"
    IGTrainer.save_model(checkpoint_epochs_file)
    trainer.run(loader, max_epochs=10)
    checkpoint_epochs_file = ".".join(
        checkpoint_file.split(".")[:-1]) + ".15.ckpt"
    IGTrainer.save_model(checkpoint_epochs_file)

Exemple #28

0

Afficher le fichier

Fichier : core.py Projet : Geo-Joy/OnionSearch

def scrape():
    global filename

    start_time = datetime.now()

    # Building the filename
    filename = str(filename).replace("$DATE",
                                     start_time.strftime("%Y%m%d%H%M%S"))
    search = str(args.search).replace(" ", "")
    if len(search) > 10:
        search = search[0:9]
    filename = str(filename).replace("$SEARCH", search)

    func_args = []
    stats_dict = {}
    if args.engines and len(args.engines) > 0:
        eng = args.engines[0]
        for e in eng:
            try:
                if not (args.exclude and len(args.exclude) > 0
                        and e in args.exclude[0]):
                    func_args.append("{}:{}".format(e, args.search))
                    stats_dict[e] = 0
            except KeyError:
                print(
                    "Error: search engine {} not in the list of supported engines"
                    .format(e))
    else:
        for e in supported_engines.keys():
            if not (args.exclude and len(args.exclude) > 0
                    and e in args.exclude[0]):
                func_args.append("{}:{}".format(e, args.search))
                stats_dict[e] = 0

    # Doing multiprocessing
    units = min((cpu_count() - 1), len(func_args))
    if args.mp_units and args.mp_units > 0:
        units = min(args.mp_units, len(func_args))
    print("search.py started with {} processing units...".format(units))
    freeze_support()

    results = {}
    with Pool(units,
              initializer=tqdm.set_lock(RLock()),
              initargs=(tqdm.get_lock(), )) as p:
        results_map = p.map(run_method, func_args)
        results = reduce(lambda a, b: a + b
                         if b is not None else a, results_map)

    stop_time = datetime.now()

    if not args.continuous_write:
        with open(filename, 'w', newline='') as csv_file:
            csv_writer = csv.writer(csv_file,
                                    delimiter=field_delim,
                                    quoting=csv.QUOTE_ALL)
            for r in results:
                write_to_csv(csv_writer, r)

    total = 0
    print("\nReport:")
    print("  Execution time: %s seconds" % (stop_time - start_time))
    print("  Results per engine:")
    for r in results:
        stats_dict[r['engine']] += 1
    for s in stats_dict:
        n = stats_dict[s]
        print("    {}: {}".format(s, str(n)))
        total += n
    print("  Total: {} links written to {}".format(str(total), filename))

Exemple #29

0

Afficher le fichier

""" Progress bars in Nuitka.

This is responsible for wrapping the rendering of progress bar and emitting tracing
to the user while it's being displayed.

"""

from nuitka import Tracing
from nuitka.utils.ThreadedExecutor import RLock

try:
    from tqdm import tqdm
except ImportError:
    tqdm = None
else:
    tqdm.set_lock(RLock())


class NuitkaProgessBar(object):
    def __init__(self, stage, total, unit):
        self.stage = stage
        self.total = total
        self.unit = unit

        # No item under work yet.
        self.item = None

        # No progress yet.
        self.progress = 0

        # Render immediately with 0 progress.

Exemple #30

0

Afficher le fichier

Fichier : dataset_components.py Projet : alephdata/followthemoney-graph


def process_collection(collection):
    fid = collection["foreign_id"]
    fid = fid.replace("/", "")
    fname = f"./dataset_components/{fid}.json"
    if os.path.exists(fname):
        return
    try:
        components = calculate_components(collection)
    except AlephException as e:
        print(f"Aleph Error: {fid}: {e}")
        return
    with open(fname, "w+") as fd:
        data = {
            "components_histogram": dict(components),
            "collection": collection,
        }
        fd.write(json.dumps(data))


if __name__ == "__main__":
    init_aleph()
    collections = api.filter_collections("*")
    N = collections.result["total"]
    tqdm.set_lock(mp.RLock())
    with mp.Pool(processes=4, initializer=init_aleph, initargs=(tqdm.get_lock(),)) as p:
        results = p.imap_unordered(process_collection, collections, chunksize=32)
        for _ in tqdm(results, total=N, position=0):
            pass