コード例 #1
0
    def test_intersection(self):
        """
        Checks for intersection().
        """
        set1 = Multiset({"a": 10, "b": 20})
        set2 = Multiset({"a": 1, "c": 2})
        set3 = Multiset({"a": 1, "b": 0, "c": 0})
        self.assertEqual(set1.intersection(set2), set3)

        set1 = Multiset({"a": 10, "b": 20})
        set2 = Multiset({"x": 1, "y": 2})
        set3 = Multiset({"a": 0, "b": 0, "x": 0, "y": 0})
        self.assertEqual(set1.intersection(set2), set3)
コード例 #2
0
ファイル: factoring.py プロジェクト: dvihaan/Python-Beginners
def GCF(x, y):
    pfx = Multiset(primefactors(x))
    pfy = Multiset(primefactors(y))
    gcf = 1
    for p in pfx.intersection(pfy):
        gcf = gcf * p
    return gcf
コード例 #3
0
class NGramms(object):
    def __init__(self, words=None, n=None, ngramms=None):
        if ngramms is not None:
            self.ngramms = ngramms
            return
        self.ngramms = Multiset(
            [NGramm(words[i:i + n]) for i in range(len(words) - n)])
        self.words = words

    def __str__(self):
        return unicode(self).encode('utf-8')

    def __unicode__(self):
        m = map(lambda s: u'"{}"'.format(unicode(s)), self.ngramms)
        return u', '.join(m)

    def __len__(self):
        return len(self.ngramms)

    def intersection(self, other):
        both = self.ngramms.intersection(other.ngramms)
        return NGramms(ngramms=both)

    def count(self):
        return sum([self.ngramms[x] for x in self.ngramms])
コード例 #4
0
    def extract(self, question: str, doc: str) -> float:

        tokenized_question = self.tokenizer.tokenize(question)
        tokenized_doc = self.tokenizer.tokenize(doc)

        question_bigrams = Multiset(nltk.bigrams(tokenized_question))
        doc_bigrams = Multiset(nltk.bigrams(tokenized_doc))

        overlap = sum(question_bigrams.intersection(doc_bigrams).values())

        if self.normalized:
            overlap /= len(tokenized_question)

        return overlap
コード例 #5
0
def jaccard_index(a: multiset.Multiset, b: multiset.Multiset) -> float:
    num = len(a.intersection(b))
    den = len(a.union(b))
    return num / den if den else np.nan