Esempi in Python per compute_grams, esempi in Python per fjord.feedback.utils.compute_grams

Esempio n. 1

0

Mostra file

File: test_models.py Progetto: groovecoder/fjord

    def test_bigrams(self):
        # Note: Tokens look weird after being analyzed probably due to
        # the stemmer. We could write a bunch of code to "undo" some
        # of the excessive stemming, but it's probably an exercise in
        # futility. Ergo the tests look a little odd. e.g. "youtub"

        # One word a bigram does not make
        assert compute_grams(u'youtube') == []

        # Two words is the minimum number to create a bigram
        assert(
            sorted(compute_grams(u'youtube crash')) ==
            ['crash youtube']
            )

        # Three words creates two bigrams
        assert(
            sorted(compute_grams(u'youtube crash flash')) ==
            ['crash flash', 'crash youtube']
            )

        # Four words creates three bigrams
        assert(
            sorted(compute_grams(u'youtube crash flash bridge')) ==
            ['bridge flash', 'crash flash', 'crash youtube']
            )

        # Nix duplicate bigrams
        assert(
            sorted(compute_grams(u'youtube crash youtube flash')) ==
            ['crash youtube', 'flash youtube']
            )

Esempio n. 2

0

Mostra file

File: test_models.py Progetto: seanowenhayes/fjord

    def test_parsing(self):
        # stop words are removed
        eq_(compute_grams(u'i me him her'), [])

        # capital letters don't matter
        eq_(compute_grams(u'I ME HIM HER'), [])

        # punctuation nixed
        eq_(compute_grams(u'i, me, him, her'), [])

Esempio n. 3

0

Mostra file

    def test_parsing(self):
        # stop words are removed
        eq_(compute_grams(u'i me him her'), [])

        # capital letters don't matter
        eq_(compute_grams(u'I ME HIM HER'), [])

        # punctuation nixed
        eq_(compute_grams(u'i, me, him, her'), [])

Esempio n. 4

0

Mostra file

File: test_models.py Progetto: groovecoder/fjord

    def test_parsing(self):
        # stop words are removed
        assert compute_grams(u'i me him her') == []

        # capital letters don't matter
        assert compute_grams(u'I ME HIM HER') == []

        # punctuation nixed
        assert compute_grams(u'i, me, him, her') == []

Esempio n. 5

0

Mostra file

File: test_utils.py Progetto: groovecoder/fjord

    def test_basic(self):
        test_data = [
            ('The quick brown fox', [u'brown quick', u'brown fox']),
            ('the latest update disables the New tab function', [
                u'disables new', u'function tab', u'new tab', u'latest update',
                u'disables update'
            ]),
            ('why is firefox so damn slow???? many tabs load slow or not at '
             'all!', [u'load tabs', u'load slow', u'slow tabs', u'damn slow']),
            ("I'm one of the guys that likes to try Firefox ahead of the "
             'herd... usually I use Nightly, but then a while back my '
             'favorite add-on, TabMixPlus stopped working because Firefox '
             "redid something in the code. \"No problem,\" says I to myself, "
             "I'll just use Aurora until they get it fixed.", [
                 u'add-on favorite', u'add-on tabmixplus', u'ahead herd',
                 u'ahead try', u'aurora fixed', u'aurora use', u'code problem',
                 u'code redid', u'favorite nightly', u"guys i'm",
                 u'guys likes', u'herd usually', u"i'll just", u"i'll myself",
                 u'just use', u'likes try', u'myself says', u'nightly use',
                 u'problem says', u'redid working', u'stopped tabmixplus',
                 u'stopped working', u'use usually'
             ]),
            ('Being partially sighted, I found the features with Windows XP '
             'and IE8 extremely usefu;. I need everything in Arial black bold '
             'text.', [
                 u'extremely usefu', u'features sighted', u'windows xp',
                 u'ie8 xp', u'black bold', u'partially sighted', u'need usefu',
                 u'features windows', u'arial need', u'arial black',
                 u'bold text', u'extremely ie8'
             ]),
        ]

        for text, expected in test_data:
            assert sorted(compute_grams(text)) == sorted(expected)

Esempio n. 6

0

Mostra file

File: models.py Progetto: seanowenhayes/fjord

    def extract_document(cls, obj_id, obj=None):
        if obj is None:
            obj = cls.get_model().objects.get(pk=obj_id)

        def empty_to_unknown(text):
            return u'Unknown' if text == u'' else text

        doc = {
            'id': obj.id,
            'prodchan': obj.prodchan,
            'happy': obj.happy,
            'url': obj.url,
            'description': obj.description,
            'user_agent': obj.user_agent,
            'product': obj.product,
            'channel': obj.channel,
            'version': obj.version,
            'browser': obj.browser,
            'browser_version': obj.browser_version,
            'platform': obj.platform,
            'locale': obj.locale,
            'device': obj.device,
            'manufacturer': obj.manufacturer,
            'created': obj.created,
        }

        # We only compute bigrams for english because the analysis
        # uses English stopwords, stemmers, ...
        if obj.locale.startswith(u'en') and obj.description:
            bigrams = compute_grams(obj.description)
            doc['description_bigrams'] = bigrams

        return doc

Esempio n. 7

0

Mostra file

File: models.py Progetto: TroJan/fjord

    def extract_doc(cls, resp, with_id=True):
        """Converts a Response to a dict of values

        This can be used with ``ResponseDocType.from_obj()`` to create a
        ``ResponseDocType`` object or it can be used for indexing.

        :arg resp: a Response object
        :arg with_id: whether or not to include the ``_id`` value--include
            it when you're bulk indexing

        :returns: a dict

        """
        doc = {
            'id': resp.id,
            'happy': resp.happy,
            'api': resp.api,
            'url': resp.url,
            'url_domain': resp.url_domain,
            'has_email': bool(resp.user_email),
            'description': resp.description,
            'user_agent': resp.user_agent,
            'product': resp.product,
            'channel': resp.channel,
            'version': resp.version,
            'browser': resp.browser,
            'browser_version': resp.browser_version,
            'platform': resp.platform,
            'locale': resp.locale,
            'country': resp.country,
            'device': resp.device,
            'manufacturer': resp.manufacturer,
            'source': resp.source,
            'campaign': resp.campaign,
            'source_campaign': '::'.join([
                (resp.source or '--'),
                (resp.campaign or '--')
            ]),
            'organic': (not resp.campaign),
            'created': resp.created
        }

        # We only compute bigrams for english because the analysis
        # uses English stopwords, stemmers, ...
        if resp.locale.startswith(u'en') and resp.description:
            doc['description_bigrams'] = compute_grams(resp.description)
        else:
            doc['description_bigrams'] = []

        if with_id:
            doc['_id'] = doc['id']
        return doc

Esempio n. 8

0

Mostra file

File: models.py Progetto: Ritsyy/fjord

    def extract_doc(cls, resp, with_id=True):
        """Converts a Response to a dict of values

        This can be used with ``ResponseDocType.from_obj()`` to create a
        ``ResponseDocType`` object or it can be used for indexing.

        :arg resp: a Response object
        :arg with_id: whether or not to include the ``_id`` value--include
            it when you're bulk indexing

        :returns: a dict

        """
        doc = {
            "id": resp.id,
            "happy": resp.happy,
            "api": resp.api,
            "url": resp.url,
            "url_domain": resp.url_domain,
            "has_email": bool(resp.user_email),
            "description": resp.description,
            "user_agent": resp.user_agent,
            "product": resp.product,
            "channel": resp.channel,
            "version": resp.version,
            "browser": resp.browser,
            "browser_version": resp.browser_version,
            "platform": resp.platform,
            "locale": resp.locale,
            "country": resp.country,
            "device": resp.device,
            "manufacturer": resp.manufacturer,
            "source": resp.source,
            "campaign": resp.campaign,
            "source_campaign": "::".join([(resp.source or "--"), (resp.campaign or "--")]),
            "organic": (not resp.campaign),
            "created": resp.created,
        }

        # We only compute bigrams for english because the analysis
        # uses English stopwords, stemmers, ...
        if resp.locale.startswith(u"en") and resp.description:
            doc["description_bigrams"] = compute_grams(resp.description)
        else:
            doc["description_bigrams"] = []

        if with_id:
            doc["_id"] = doc["id"]
        return doc

Esempio n. 9

0

Mostra file

File: models.py Progetto: DerekRies/fjord

    def extract_document(cls, obj_id, obj=None):
        if obj is None:
            obj = cls.get_model().objects.get(pk=obj_id)

        doc = {
            'id': obj.id,
            'prodchan': obj.prodchan,
            'happy': obj.happy,
            'api': obj.api,
            'url': obj.url,
            'url_domain': obj.url_domain,
            'has_email': bool(obj.user_email),
            'description': obj.description,
            'category': obj.category,
            'description_terms': obj.description,
            'user_agent': obj.user_agent,
            'product': obj.product,
            'channel': obj.channel,
            'version': obj.version,
            'browser': obj.browser,
            'browser_version': obj.browser_version,
            'platform': obj.platform,
            'locale': obj.locale,
            'country': obj.country,
            'device': obj.device,
            'manufacturer': obj.manufacturer,
            'source': obj.source,
            'campaign': obj.campaign,
            'source_campaign': '::'.join([
                (obj.source or '--'),
                (obj.campaign or '--')
            ]),
            'organic': (not obj.source and not obj.campaign),
            'created': obj.created,
        }

        # We only compute bigrams for english because the analysis
        # uses English stopwords, stemmers, ...
        if obj.locale.startswith(u'en') and obj.description:
            bigrams = compute_grams(obj.description)
            doc['description_bigrams'] = bigrams

        return doc

Esempio n. 10

0

Mostra file

File: models.py Progetto: joshua-s/fjord

    def extract_document(cls, obj_id, obj=None):
        if obj is None:
            obj = cls.get_model().objects.get(pk=obj_id)

        def empty_to_unknown(text):
            return u"Unknown" if text == u"" else text

        doc = {
            "id": obj.id,
            "prodchan": obj.prodchan,
            "happy": obj.happy,
            "url": obj.url,
            "url_domain": obj.url_domain,
            "has_email": bool(obj.user_email),
            "description": obj.description,
            "description_terms": obj.description,
            "user_agent": obj.user_agent,
            "product": obj.product,
            "channel": obj.channel,
            "version": obj.version,
            "browser": obj.browser,
            "browser_version": obj.browser_version,
            "platform": obj.platform,
            "locale": obj.locale,
            "country": obj.country,
            "device": obj.device,
            "manufacturer": obj.manufacturer,
            "created": obj.created,
        }

        # We only compute bigrams for english because the analysis
        # uses English stopwords, stemmers, ...
        if obj.locale.startswith(u"en") and obj.description:
            bigrams = compute_grams(obj.description)
            doc["description_bigrams"] = bigrams

        return doc

Esempio n. 11

0

Mostra file

File: test_utils.py Progetto: Ritsyy/fjord

    def test_basic(self):
        test_data = [
            ('The quick brown fox', [u'brown quick', u'brown fox']),

            ('the latest update disables the New tab function',
             [u'disables new', u'function tab', u'new tab', u'latest update',
              u'disables update']),

            ('why is firefox so damn slow???? many tabs load slow or not at '
             'all!',
             [u'load tabs', u'load slow', u'slow tabs', u'damn slow']),

            ("I'm one of the guys that likes to try Firefox ahead of the "
             'herd... usually I use Nightly, but then a while back my '
             'favorite add-on, TabMixPlus stopped working because Firefox '
             "redid something in the code. \"No problem,\" says I to myself, "
             "I'll just use Aurora until they get it fixed.",
             [u'add-on favorite', u'add-on tabmixplus', u'ahead herd',
              u'ahead try', u'aurora fixed', u'aurora use', u'code problem',
              u'code redid', u'favorite nightly', u"guys i'm", u'guys likes',
              u'herd usually', u"i'll just", u"i'll myself", u'just use',
              u'likes try', u'myself says', u'nightly use', u'problem says',
              u'redid working', u'stopped tabmixplus', u'stopped working',
              u'use usually']),

            ('Being partially sighted, I found the features with Windows XP '
             'and IE8 extremely usefu;. I need everything in Arial black bold '
             'text.',
             [u'extremely usefu', u'features sighted', u'windows xp',
              u'ie8 xp', u'black bold', u'partially sighted', u'need usefu',
              u'features windows', u'arial need', u'arial black', u'bold text',
              u'extremely ie8']),
        ]

        for text, expected in test_data:
            assert sorted(compute_grams(text)) == sorted(expected)

Esempio n. 12

0

Mostra file

    def extract_document(cls, obj_id, obj=None):
        if obj is None:
            obj = cls.get_model().objects.get(pk=obj_id)

        doc = {
            'id':
            obj.id,
            'happy':
            obj.happy,
            'api':
            obj.api,
            'url':
            obj.url,
            'url_domain':
            obj.url_domain,
            'has_email':
            bool(obj.user_email),
            'description':
            obj.description,
            'category':
            obj.category,
            'description_terms':
            obj.description,
            'user_agent':
            obj.user_agent,
            'product':
            obj.product,
            'channel':
            obj.channel,
            'version':
            obj.version,
            'browser':
            obj.browser,
            'browser_version':
            obj.browser_version,
            'platform':
            obj.platform,
            'locale':
            obj.locale,
            'country':
            obj.country,
            'device':
            obj.device,
            'manufacturer':
            obj.manufacturer,
            'source':
            obj.source,
            'campaign':
            obj.campaign,
            'source_campaign':
            '::'.join([(obj.source or '--'), (obj.campaign or '--')]),
            'organic': (not obj.campaign),
            'created':
            obj.created,
        }

        # We only compute bigrams for english because the analysis
        # uses English stopwords, stemmers, ...
        if obj.locale.startswith(u'en') and obj.description:
            bigrams = compute_grams(obj.description)
            doc['description_bigrams'] = bigrams

        return doc

Esempio n. 13

0

Mostra file

File: test_models.py Progetto: seanowenhayes/fjord

 def test_empty(self):
     eq_(compute_grams(u''), [])

Esempio n. 14

0

Mostra file

File: models.py Progetto: groovecoder/fjord

    def extract_doc(cls, resp, with_id=True):
        """Converts a Response to a dict of values

        This can be used with ``ResponseDocType.from_obj()`` to create a
        ``ResponseDocType`` object or it can be used for indexing.

        :arg resp: a Response object
        :arg with_id: whether or not to include the ``_id`` value--include
            it when you're bulk indexing

        :returns: a dict

        """
        doc = {
            'id':
            resp.id,
            'happy':
            resp.happy,
            'api':
            resp.api,
            'url':
            resp.url,
            'url_domain':
            resp.url_domain,
            'has_email':
            bool(resp.user_email),
            'description':
            resp.description,
            'user_agent':
            resp.user_agent,
            'product':
            resp.product,
            'channel':
            resp.channel,
            'version':
            resp.version,
            'browser':
            resp.browser,
            'browser_version':
            resp.browser_version,
            'platform':
            resp.platform,
            'locale':
            resp.locale,
            'country':
            resp.country,
            'device':
            resp.device,
            'manufacturer':
            resp.manufacturer,
            'source':
            resp.source,
            'campaign':
            resp.campaign,
            'source_campaign':
            '::'.join([(resp.source or '--'), (resp.campaign or '--')]),
            'organic': (not resp.campaign),
            'created':
            resp.created
        }

        # We only compute bigrams for english because the analysis
        # uses English stopwords, stemmers, ...
        if resp.locale.startswith(u'en') and resp.description:
            doc['description_bigrams'] = compute_grams(resp.description)
        else:
            doc['description_bigrams'] = []

        if with_id:
            doc['_id'] = doc['id']
        return doc

Esempio n. 15

0

Mostra file

 def test_empty(self):
     eq_(compute_grams(u''), [])

Esempio n. 16

0

Mostra file

File: test_models.py Progetto: groovecoder/fjord

 def test_empty(self):
     assert compute_grams(u'') == []