def extract_jaccard_ngrams_word(tweet1, tweet2, threshold, ngram_num):
    """ Extracts all similar ngrams with jaccard distance below a threshold
        Threshold adapts to ngrams since jaccard penalises short shorter ngrams
        Extracts the longest ngrams first for more context
    """
    new_thres = threshold / (ngram_num * .1)
    result = []

    for n in reversed(range(5, ngram_num + 1)):
        tokens_a, tokens_b = tweet1.tokenize(), tweet2.tokenize()
        ngrams_a, ngrams_b = ngrams(tokens_a, n), ngrams(tokens_b, n)

        for ngram_a in ngrams_a:
            temp_list = []
            joint_a = " ".join(ngram_a)
            set_a = set(joint_a)

            for ngram_b in ngrams_b:
                joint_b = " ".join(ngram_b)
                set_b = set(joint_b)

                distance = jaccard_distance(set_a, set_b)
                temp_list.append((distance, joint_a, joint_b))

            min_score = min(temp_list, key = lambda t:t[0], default=1)

            if min_score != 1:
                if min_score[0] < new_thres and min_score[0] != 0:

                    # Sort best sentence in second
                    joint_a = Tweet(min_score[1])
                    joint_b = Tweet(min_score[2])
                    joint_a.filter("*")
                    joint_b.filter("*")

                    if joint_a.oov_words() > joint_b.oov_words():
                        bi_combination = joint_a.source_filter(), joint_b.target_filter()
                    else:
                        bi_combination = joint_b.source_filter(), joint_a.target_filter()

                    if bi_combination not in result:
                        result.append(bi_combination)

                    tweet1.strip_out(joint_a.clean_text)
                    tweet2.strip_out(joint_b.clean_text)
    return result
from tweet import Tweet

s = "Aurah : “ Ves a Miriam bipolar ? ” Verdeliss : “ Las enfermedades mentales las tiene que diagnosticar un profesional , no yo , así que no ” Aurah hija , te cubres de gloria"
z = "Aurah : “ Ves a Miriam bipolar ? ” Verdeliss : “ Las enfermedades mentales las tiene que diagnosticar un profesional , no yo , …"

a = Tweet(s)
a.filter("*")

b = Tweet(z)
b.filter("*")

print(a.tweet_len())
print(b.tweet_len())
result = a.tweet_len() - b.tweet_len() < 4
print(result)