def preprocess_text(docs):
    num_task = os.cpu_count()
    len_slices = len(docs) // num_task
    remainder_slices = len(docs) % num_task

    texts = []
    stoplist = set(stopwords.words('english'))
    
    wn.ensure_loaded()
    t_start = time.perf_counter()
    with ProcessPoolExecutor(max_workers=num_task) as executor:

        futures_tokenize = []
        for n in range(0, num_task):

            upper_bound = (n+1) * len_slices
            if n == num_task - 1:
                upper_bound = (n+1) * len_slices + remainder_slices

            print(n, upper_bound)
            futures_tokenize.append(executor.submit(preprocess_tokenize, docs[n * len_slices:upper_bound],
                            stoplist))

        for future in concurrent.futures.as_completed(futures_tokenize):
            texts += future.result()

    t_stop = time.perf_counter()
    print("removed stopwords and lemmatized in {} s".format(t_stop - t_start))
    # Add bigrams and trigrams to docs (only ones that appear 20 times or more).
    bigram = Phraser(Phrases(texts, min_count=20))
    for idx in range(len(texts)):
        for token in bigram[texts[idx]]:
            if '_' in token:
                # Token is a bigram, add to document.
                texts[idx].append(token)

    print("Done bigrams")
    dictionary = Dictionary(texts)
    dictionary.filter_extremes(no_below=30, no_above=0.5)
    dictionary.filter_tokens(bad_ids=[dictionary.token2id["like"]])
    special_tokens = {'_pad_': 0}
    dictionary.patch_with_special_tokens(special_tokens)

    return texts, dictionary