Exemple #1
0
    def test_merge_differing_capitalization(self):
        # this tests #19
        insert_rows(self.scratch_db, 'brand', [
            dict(brand='CardScan',
                 company='Newell Rubbermaid',
                 scraper_id='sr.campaign.hrc'),
            dict(brand='Cardscan',
                 company='Newell Rubbermaid',
                 scraper_id='sr.campaign.hrc'),
        ])

        insert_row(self.output_db, 'scraper_company_map', dict(
            company='Newell Rubbermaid',
            scraper_company='Newell Rubbermaid',
            scraper_id='sr.campaign.hrc')
        )

        build_scraper_brand_map_table(self.output_db, self.scratch_db)

        self.assertEqual(
            select_all(self.output_db, 'scraper_brand_map'),
            [dict(brand='CardScan',
                  company='Newell Rubbermaid',
                  scraper_brand='CardScan',
                  scraper_company='Newell Rubbermaid',
                  scraper_id='sr.campaign.hrc'),
             dict(brand='CardScan',
                  company='Newell Rubbermaid',
                  scraper_brand='Cardscan',
                  scraper_company='Newell Rubbermaid',
                  scraper_id='sr.campaign.hrc'),
            ])
Exemple #2
0
    def test_dont_push_brand_to_unrelated_subsidiary(self):
        # tests #59
        insert_rows(self.scratch_db, 'brand', [
            dict(brand='Dove',
                 company='Unilever',
                 scraper_id='campaign.hrc'),
        ])

        insert_rows(self.output_db, 'scraper_company_map', [
            dict(company='Unilever',
                 scraper_company='Unilever',
                 scraper_id='campaign.hrc'),
        ])

        insert_rows(self.output_db, 'subsidiary', [
            dict(company='Unilever',
                 company_depth=0,
                 subsidiary="Ben & Jerry's",
                 subsidiary_depth=1),
        ])

        build_scraper_brand_map_table(self.output_db, self.scratch_db)

        self.assertEqual(
            select_all(self.output_db, 'scraper_brand_map'),
            [dict(brand='Dove',
                  company='Unilever',
                  scraper_brand='Dove',
                  scraper_company='Unilever',
                  scraper_id='campaign.hrc'),
            ])
Exemple #3
0
    def test_match_brand_to_subsidiary_name(self):
        insert_rows(self.scratch_db, 'brand', [
            dict(brand='Puma',
                 company='Kering SA',
                 scraper_id='campaign.rankabrand'),
        ])

        insert_rows(self.output_db, 'scraper_company_map', [
            dict(company='Kering',
                 scraper_company='Kering SA',
                 scraper_id='campaign.rankabrand'),
        ])

        insert_rows(self.output_db, 'subsidiary', [
            dict(company='Kering',
                 company_depth=0,
                 subsidiary='Puma',
                 subsidiary_depth=1),
        ])

        build_scraper_brand_map_table(self.output_db, self.scratch_db)

        self.assertEqual(
            select_all(self.output_db, 'scraper_brand_map'),
            [dict(brand='Puma',
                  company='Puma',
                  scraper_brand='Puma',
                  scraper_company='Kering SA',
                  scraper_id='campaign.rankabrand'),
            ])
Exemple #4
0
    def test_dump_empty_brand(self):
        insert_rows(self.scratch_db, 'brand', [
            dict(brand='™',
                 company='Voidcorp',
                 scraper_id='s'),
        ])

        insert_rows(self.output_db, 'scraper_company_map', [
            dict(company='Voidcorp',
                 scraper_company='Voidcorp',
                 scraper_id='s'),
        ])

        build_scraper_brand_map_table(self.output_db, self.scratch_db)

        self.assertEqual(
            select_all(self.output_db, 'scraper_brand_map'),
            [])
Exemple #5
0
    def test_prefer_subsidiary_for_brand(self):
        # tests #16
        insert_rows(self.scratch_db, 'brand', [
            dict(brand='Puma',
                 company='Puma',
                 scraper_id='campaign.btb_fashion'),
            dict(brand='Puma',
                 company='Kering SA',
                 scraper_id='campaign.rankabrand'),
        ])

        insert_rows(self.output_db, 'scraper_company_map', [
            dict(company='Puma',
                 scraper_company='Puma',
                 scraper_id='campaign.btb_fashion'),
            dict(company='Kering',
                 scraper_company='Kering SA',
                 scraper_id='campaign.rankabrand'),
        ])

        insert_rows(self.output_db, 'subsidiary', [
            dict(company='Kering',
                 company_depth=0,
                 subsidiary='Puma',
                 subsidiary_depth=1),
        ])

        build_scraper_brand_map_table(self.output_db, self.scratch_db)

        self.assertEqual(
            select_all(self.output_db, 'scraper_brand_map'),
            [dict(brand='Puma',
                  company='Puma',
                  scraper_brand='Puma',
                  scraper_company='Kering SA',
                  scraper_id='campaign.rankabrand'),
             dict(brand='Puma',
                  company='Puma',
                  scraper_brand='Puma',
                  scraper_company='Puma',
                  scraper_id='campaign.btb_fashion'),
            ])
Exemple #6
0
    def test_match_canonical_company_name_only(self):
        # tests #40

        insert_rows(self.scratch_db, 'brand', [
            dict(brand='Asus',
                 company='Asus',
                 scraper_id='campaign.btb_electronics'),
            dict(brand='Asus',
                 company='ASUSTeK Computer Incorporated',
                 scraper_id='campaign.rankabrand'),
        ])

        # we picked ASUS as the canonical name based on company_name
        insert_rows(self.output_db, 'scraper_company_map', [
            dict(company='ASUS',
                 scraper_id='campaign.btb_electronics',
                 scraper_company='Asus'),
            dict(company='ASUS',
                 scraper_id='campaign.rankabrand',
                 scraper_company='ASUSTeK Computer Incorporated'),
        ])

        build_scraper_brand_map_table(self.output_db, self.scratch_db)

        self.assertEqual(
            select_all(self.output_db, 'scraper_brand_map'),
            [
                dict(brand='ASUS',
                     company='ASUS',
                     scraper_brand='Asus',
                     scraper_company='ASUSTeK Computer Incorporated',
                     scraper_id='campaign.rankabrand'),
                dict(brand='ASUS',
                     company='ASUS',
                     scraper_brand='Asus',
                     scraper_company='Asus',
                     scraper_id='campaign.btb_electronics'),
            ])
Exemple #7
0
    def test_merge_hyphens(self):
        # tests #31

        insert_rows(self.scratch_db, 'brand', [
            dict(brand='Liquid Plumr',
                 company='Clorox',
                 scraper_id='company.clorox'),
            dict(brand='Liquid-Plumr',
                 company='Clorox',
                 scraper_id='campaign.hrc'),
        ])

        insert_rows(self.output_db, 'scraper_company_map', [
            dict(company='Clorox',
                 scraper_id='company.clorox',
                 scraper_company='Clorox'),
            dict(company='Clorox',
                 scraper_id='campaign.hrc',
                 scraper_company='Clorox'),
        ])

        build_scraper_brand_map_table(self.output_db, self.scratch_db)

        self.assertEqual(
            select_all(self.output_db, 'scraper_brand_map'),
            [
                dict(brand='Liquid-Plumr',
                     company='Clorox',
                     scraper_brand='Liquid Plumr',
                     scraper_company='Clorox',
                     scraper_id='company.clorox'),
                dict(brand='Liquid-Plumr',
                     company='Clorox',
                     scraper_brand='Liquid-Plumr',
                     scraper_company='Clorox',
                     scraper_id='campaign.hrc'),
            ])