Example #1
0
    def test_smoke(self):
        """Does a full run of weighted positive words"""

        # Need 3 mock businesses to test
        business1 = BUSINESS_TEMPLATE % (CATEGORY, "Yelp")
        business2 = BUSINESS_TEMPLATE % (CATEGORY, "Target")
        business3 = BUSINESS_TEMPLATE % (CATEGORY, "Walmart") 
        # Need more than 1 review for weighted threshold
        review1 = REVIEW_TEMPLATE % (TEXT, "Yelp")
        review2 = REVIEW_TEMPLATE % (TEXT, "Target")
        review3 = REVIEW_TEMPLATE % (TEXT, "Walmart")

        # Need at least 50 occurrences of reviews, so multiply the first review by 20
        total_input = (business1 + business2 + business3
            + (review1 * 20) + review2 + review3)
        static_stdin = StringIO(total_input)

        job = WeightedPositiveWords(['-r', 'inline', '--no-conf', '-'])
        job.sandbox(stdin=static_stdin)

        results = []
        with job.make_runner() as runner:
            runner.run()
            for line in runner.stream_output():
                key, value = job.parse_output_line(line)
                results.append(value)
        end_result = [[CATEGORY, 66.0, 'hello'], [CATEGORY, 66.0, 'world']]
        self.assertEqual(results, end_result)
Example #2
0
    def test_review_mapper(self):
        """Test the review_mapper function to make sure that based on a mock input,
        it produces the correct calculated output
        """
        biz_review_positivity = (BIZ_NAME, (TEXT, 3))

        job = WeightedPositiveWords()
        review_results = list(job.review_mapper(CATEGORY, biz_review_positivity))
        results = [((CATEGORY, u'world'), (BIZ_NAME, 3)), ((CATEGORY, u'hello'), (BIZ_NAME, 3))]
        T.assert_sorted_equal(review_results, results)
Example #3
0
    def test_category_join(self):
        """Test the category_join_reducer function with the same results
        from above. These tests should be used to isolate where an error
        will come from if a person changes any of the functions in the mr
        """
        review_or_categories = (('review', (TEXT, 3)),  ('categories', [CATEGORY]))

        job = WeightedPositiveWords()
        join_results = list(job.category_join_reducer(BIZ_NAME, review_or_categories))
        results = [(CATEGORY, (BIZ_NAME, (TEXT, 3)))]
        self.assertEqual(join_results, results)
Example #4
0
    def test_review_mapper(self):
        """Test the review_mapper function to make sure that based on a mock input,
        it produces the correct calculated output
        """
        biz_review_positivity = (BIZ_NAME, (TEXT, 3))

        job = WeightedPositiveWords()
        review_results = list(
            job.review_mapper(CATEGORY, biz_review_positivity))
        results = [((CATEGORY, u'world'), (BIZ_NAME, 3)),
                   ((CATEGORY, u'hello'), (BIZ_NAME, 3))]
        T.assert_sorted_equal(review_results, results)
Example #5
0
    def test_category_join(self):
        """Test the category_join_reducer function with the same results
        from above. These tests should be used to isolate where an error
        will come from if a person changes any of the functions in the mr
        """
        review_or_categories = (('review', (TEXT, 3)), ('categories',
                                                        [CATEGORY]))

        job = WeightedPositiveWords()
        join_results = list(
            job.category_join_reducer(BIZ_NAME, review_or_categories))
        results = [(CATEGORY, (BIZ_NAME, (TEXT, 3)))]
        self.assertEqual(join_results, results)
Example #6
0
    def test_review_category(self):
        """Test the review_category_mapper function with a mock input"""

        review = REVIEW_TEMPLATE % (TEXT, BIZ_NAME)
        business = BUSINESS_TEMPLATE % (CATEGORY, BIZ_NAME)

        job = WeightedPositiveWords()
        review_results = list(job.review_category_mapper(None, json.loads(review)))
        biz_results = list(job.review_category_mapper(None, json.loads(business)))
        review_after_results = [(BIZ_NAME, ('review', (TEXT, 3)))]                
        biz_after_results = [(BIZ_NAME, ('categories', [CATEGORY]))]
        self.assertEqual(review_results, review_after_results)
        self.assertEqual(biz_results, biz_after_results)
Example #7
0
    def test_review_category(self):
        """Test the review_category_mapper function with a mock input"""

        review = REVIEW_TEMPLATE % (TEXT, BIZ_NAME)
        business = BUSINESS_TEMPLATE % (CATEGORY, BIZ_NAME)

        job = WeightedPositiveWords()
        review_results = list(
            job.review_category_mapper(None, json.loads(review)))
        biz_results = list(
            job.review_category_mapper(None, json.loads(business)))
        review_after_results = [(BIZ_NAME, ('review', (TEXT, 3)))]
        biz_after_results = [(BIZ_NAME, ('categories', [CATEGORY]))]
        self.assertEqual(review_results, review_after_results)
        self.assertEqual(biz_results, biz_after_results)
Example #8
0
    def test_smoke(self):
        """Does a full run of weighted positive words"""

        # Need 3 mock businesses to test
        business1 = BUSINESS_TEMPLATE % (CATEGORY, "Yelp")
        business2 = BUSINESS_TEMPLATE % (CATEGORY, "Target")
        business3 = BUSINESS_TEMPLATE % (CATEGORY, "Walmart")
        # Need more than 1 review for weighted threshold
        review1 = REVIEW_TEMPLATE % (TEXT, "Yelp")
        review2 = REVIEW_TEMPLATE % (TEXT, "Target")
        review3 = REVIEW_TEMPLATE % (TEXT, "Walmart")

        # Need at least 50 occurrences of reviews, so multiply the first review by 20
        total_input = (business1 + business2 + business3 + (review1 * 20) +
                       review2 + review3)
        static_stdin = StringIO(total_input)

        job = WeightedPositiveWords(['-r', 'inline', '--no-conf', '-'])
        job.sandbox(stdin=static_stdin)

        results = []
        with job.make_runner() as runner:
            runner.run()
            for line in runner.stream_output():
                key, value = job.parse_output_line(line)
                results.append(value)
        end_result = [[CATEGORY, 66.0, 'hello'], [CATEGORY, 66.0, 'world']]
        self.assertEqual(results, end_result)
Example #9
0
        avg, total = avg_and_total(positivities)

        if total < MINIMUM_OCCURENCES:
            return

        yield int(avg * 100), (category, total, word)

    def steps(self):
        return [
            self.mr(self.review_category_mapper, self.category_join_reducer),
            self.mr(self.review_mapper, self.positivity_reducer)
        ]


if __name__ == "__main__":
    WeightedPositiveWords().run()

########NEW FILE########
__FILENAME__ = autopilot
# Copyright 2011 Yelp and Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and