Python group Examples

Programming Language: Python

Namespace/Package Name: docqa.utils

Method/Function: group

Examples at hotexamples.com: 3

Python group - 3 examples found. These are the top rated real world Python examples of docqa.utils.group extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: evidence_corpus.py Project: zengyy8/XQA

def build_tokenized_corpus(input_root,
                           tokenizer,
                           output_dir,
                           skip_dirs=False,
                           n_processes=1,
                           wiki_only=False):
    if not exists(output_dir):
        makedirs(output_dir)

    all_files = _gather_files(input_root, output_dir, skip_dirs, wiki_only)

    if n_processes == 1:
        voc = build_tokenized_files(tqdm(all_files, ncols=80), input_root,
                                    output_dir, tokenizer)
    else:
        voc = set()
        from multiprocessing import Pool
        with Pool(n_processes) as pool:
            chunks = split(all_files, n_processes)
            chunks = flatten_iterable(group(c, 500) for c in chunks)
            pbar = tqdm(total=len(chunks), ncols=80)
            for v in pool.imap_unordered(
                    _build_tokenized_files_t,
                [[c, input_root, output_dir, tokenizer] for c in chunks]):
                voc.update(v)
                pbar.update(1)
            pbar.close()

    voc_file = join(output_dir, "vocab.txt")
    with open(voc_file, "w") as f:
        for word in sorted(voc):
            f.write(word)
            f.write("\n")

Example #2

Show file

def preprocess_par(questions: List,
                   evidence,
                   preprocessor,
                   n_processes=2,
                   chunk_size=200,
                   name=None):
    if chunk_size <= 0:
        raise ValueError("Chunk size must be >= 0, but got %s" % chunk_size)
    if n_processes is not None and n_processes <= 0:
        raise ValueError("n_processes must be >= 1 or None, but got %s" %
                         n_processes)
    n_processes = min(len(questions), n_processes)

    if n_processes == 1:
        out = preprocessor.preprocess(tqdm(questions, desc=name, ncols=80),
                                      evidence)
        preprocessor.finalize_chunk(out)
        return out
    else:
        from multiprocessing import Pool
        chunks = split(questions, n_processes)
        chunks = flatten_iterable([group(c, chunk_size) for c in chunks])
        print("Processing %d chunks with %d processes" %
              (len(chunks), n_processes))
        pbar = tqdm(total=len(questions), desc=name, ncols=80)
        lock = Lock()

        def call_back(results):
            preprocessor.finalize_chunk(results[0])
            with lock:  # FIXME Even with the lock, the progress bar still is jumping around
                pbar.update(results[1])

        with Pool(n_processes) as pool:
            results = [
                pool.apply_async(_preprocess_and_count,
                                 [c, evidence, preprocessor],
                                 callback=call_back) for c in chunks
            ]
            results = [r.get()[0] for r in results]

        pbar.close()
        output = results[0]
        for r in results[1:]:
            output += r
        return output

Example #3

Show file

File: evidence_corpus.py Project: boidi/tr_QA

def get_evidence_voc(corpus, n_processes=1):
    doc_ids = corpus.list_documents()
    voc = Counter()

    if n_processes == 1:
        for doc in tqdm(doc_ids):
            voc = corpus.get_document(doc, flat=True)
    else:
        from multiprocessing import Pool
        chunks = split(doc_ids, n_processes)
        chunks = flatten_iterable(group(x, 10000) for x in chunks)
        pbar = tqdm(total=len(chunks), ncols=80)
        with Pool(n_processes) as pool:
            for v in pool.imap_unordered(_extract_voc_tuple, [[corpus, c] for c in chunks]):
                voc += v
                pbar.update(1)
        pbar.close()

    return voc