def test_solr_rm_feedfile(self):
        """
        Test that at the end of Solr parsing, the feed file is deleted.

        """
        update_solr(self.buid_id)
        self.assertFalse(os.access(self.filepath, os.F_OK))
Example #2
0
def task_update_solr(jsid, **kwargs):
    try:
        import_jobs.update_solr(jsid, **kwargs)
        if kwargs.get('clear_cache', False):
            task_clear_bu_cache.delay(buid=int(jsid), countdown=1500)
            ImportRecord(buid=int(jsid), success=True).save()
    except Exception as e:
        logging.error(traceback.format_exc(sys.exc_info()))
        ImportRecord(buid=int(jsid), success=False).save()
        raise task_update_solr.retry(exc=e)
    def test_empty_solr(self):
        """
        Tests for the proper behavior when encountering a job-less, but
        otherwise valid, feed file. The proper behavior is to delete any
        jobs associated with that BusinessUnit from the Solr index.

        """
        # Normal download-and-parse operation on a feed file with jobs.
        update_solr(self.buid_id)
        results = self.conn.search(q="*:*", fq="buid:%s" % self.buid_id)
        self.assertEqual(results.hits, self.numjobs)

        # Download-and-parse operation on a feed file with no jobs. Expected
        # behavior is to delete all jobs.
        self._get_feedfile()
        update_solr(self.buid_id, download=False)
        results = self.conn.search(q="*:*", fq="buid:%s" % self.buid_id)
        self.assertEqual(results.hits, 0)
    def test_set_bu_title(self):
        """
        Ensure that if a feedfile for a BusinessUnit comes through, and
        the `title` attribute for that BusinessUnit is not set, that
        `helpers.update_solr` sets the `title` attribute properly.

        """
        bu = BusinessUnit.objects.get(id=self.buid_id)
        bu.title = None
        bu.save()
        # Since the BusinessUnit title is None, the intent is that update_solr
        # will set its title to match the company name found in the feed file.
        results = update_solr(self.buid_id)
        # We have to get the updated state of the BusinessUnit instance, since
        # changes to the database won't be reflected by our in-memory version of
        # the data.
        bu = BusinessUnit.objects.get(id=self.buid_id)
        # The title attribute should now equal the initial value established in
        # the setUp method.
        self.assertEquals(self.businessunit.title, bu.title)
Example #5
0
    def test_unicode_title(self):
        # Test imports
        group = factories.GroupFactory()
        self.site.group = group
        self.site.business_units.add(self.businessunit)
        self.site.save()
        import_jobs.update_solr(self.buid, download=False, delete_feed=False,
                                data_dir='seo/tests/data/')
        solr_jobs = self.conn.search("*:*")
        resp = self.client.get('/')
        self.assertEqual(resp.context['total_jobs_count'], solr_jobs.hits)

        # test standard facets against Haystack query
        standard_cf = factories.CustomFacetFactory.build(
            # default facet will return both jobs
            name="Keyword Facet",
            group=group,
            show_production=True)
        standard_cf.save()
        standard_cf.keyword.add(u'Ключевые')
        standard_cf.save()
        standard_site_facet = factories.SeoSiteFacetFactory(
            seosite=self.site,
            customfacet=standard_cf,
            facet_type=factories.SeoSiteFacet.STANDARD)
        standard_site_facet.save()

        # test standard facets against Haystack query
        standard_cf2 = factories.CustomFacetFactory.build(
            # default facet will return both jobs
            name='Country Facet',
            country='United States',
            group=group,
            show_production=True)
        standard_cf2.save()
        standard_site_facet2 = factories.SeoSiteFacetFactory(
            seosite=self.site,
            customfacet=standard_cf2,
            facet_type=factories.SeoSiteFacet.STANDARD)
        standard_site_facet2.save()

        resp = self.client.get('/keyword-facet/new-jobs/',
                               HTTP_HOST=self.site.domain, follow=True)
        sqs = DESearchQuerySet().filter(text=u'Ключевые')
        self.assertEqual(len(resp.context['default_jobs']), sqs.count())
        for facet_widget in resp.context['widgets']:
            # Ensure that no standard facet has more results than current
            # search results
            for count_tuple in facet_widget.items:
                self.assertTrue(sqs.count() >= count_tuple[1])
        
        # Test default site facets against PySolr query
        from django.core.cache import cache
        cache.clear()
        default_cf = factories.CustomFacetFactory.build(
            name="Default Facet",
            title=u"Специалист",
            group=group,
            show_production=True)
        default_cf.save()
        default_site_facet = factories.SeoSiteFacetFactory(
            seosite=self.site,
            facet_type=factories.SeoSiteFacet.DEFAULT,
            customfacet=default_cf)
        default_site_facet.save()
        resp = self.client.get('/jobs/', HTTP_HOST=self.site.domain,
                               follow=True)
        total_jobs = resp.context['total_jobs_count']
        solr_jobs = self.conn.search(q=u"title:Специалист")
        self.assertEqual(total_jobs, solr_jobs.hits)
        self.assertEqual(len(resp.context['default_jobs']), total_jobs)
        for facet_widget in resp.context['widgets']:
            for count_tuple in facet_widget.items:
                self.assertTrue(sqs.count() >= count_tuple[1])

        # Feed test
        resp = self.client.get('/feed/json', HTTP_HOST=self.site.domain)
        jobs = json.loads(resp.content)
        self.assertEqual(len(jobs), total_jobs)
        for job in jobs:
            resp = self.client.get(job['url'], HTTP_HOST=self.site.domain,
                                   follow=False)
            self.assertEqual(resp.status_code, 302)
            expected = 'http://my.jobs/%s%d?my.jobs.site.id=%s' %\
                       (job['guid'],
                        settings.FEED_VIEW_SOURCES['json'],
                        str(self.site.pk))
            self.assertEqual(resp['Location'], expected)

        # Sitemap index Test - Since sitemap only builds out updates from the
        # last 30 days, this test will eventually be checking 0 jobs in sitemap
        # TODO, find a way to keep feed dates current. We might be able to use
        # the mock library to override datetime functions
        resp = self.client.get('/sitemap.xml', HTTP_HOST=self.site.domain)  
        root = etree.fromstring(resp.content)
        self.assertGreater(len(root), 0)
        crawled_jobs = 0
        for loc, lastmod in root:
            self.assertTrue(loc.text)
            resp = self.client.get(loc.text, HTTP_HOST=self.site.domain)  
            self.assertEqual(resp.status_code, 200)
            # Get the first daily sitemap
            urlset = etree.fromstring(resp.content)
            # Check each job in daily sitemap - I'm a bot
            for loc, _, _, _ in urlset:
                resp = self.client.get(loc.text, HTTP_HOST=self.site.domain)
                self.assertEqual(resp.status_code, 200)
                self.assertIn(str(resp.context['the_job'].uid), loc.text)
                crawled_jobs += 1
Example #6
0
def task_update_solr(jsid, **kwargs):
    import_jobs.update_solr(jsid, **kwargs)
Example #7
0
def task_update_solr(jsid, **kwargs):
    try:
        import_jobs.update_solr(jsid, **kwargs)
    except Exception as e:
        logging.error(traceback.format_exc(sys.exc_info()))
        raise task_update_solr.retry(exc=e)
Example #8
0
    def test_unicode_title(self):
        # Test imports
        group = factories.GroupFactory()
        self.site.group = group
        self.site.business_units.add(self.businessunit)
        self.site.save()
        import_jobs.update_solr(self.buid,
                                download=False,
                                delete_feed=False,
                                data_dir='seo/tests/data/')
        solr_jobs = self.conn.search("*:*")
        resp = self.client.get('/')
        self.assertEqual(resp.context['total_jobs_count'], solr_jobs.hits)

        # test standard facets against Haystack query
        standard_cf = factories.CustomFacetFactory.build(
            # default facet will return both jobs
            name="Keyword Facet",
            group=group,
            show_production=True)
        standard_cf.save()
        standard_cf.keyword.add(u'Ключевые')
        standard_cf.save()
        standard_site_facet = factories.SeoSiteFacetFactory(
            seosite=self.site,
            customfacet=standard_cf,
            facet_type=factories.SeoSiteFacet.STANDARD)
        standard_site_facet.save()

        # test standard facets against Haystack query
        standard_cf2 = factories.CustomFacetFactory.build(
            # default facet will return both jobs
            name='Country Facet',
            country='United States',
            group=group,
            show_production=True)
        standard_cf2.save()
        standard_site_facet2 = factories.SeoSiteFacetFactory(
            seosite=self.site,
            customfacet=standard_cf2,
            facet_type=factories.SeoSiteFacet.STANDARD)
        standard_site_facet2.save()

        resp = self.client.get('/keyword-facet/new-jobs/',
                               HTTP_HOST=self.site.domain,
                               follow=True)
        sqs = DESearchQuerySet().filter(text=u'Ключевые')
        self.assertEqual(len(resp.context['default_jobs']), sqs.count())
        for facet_widget in resp.context['widgets']:
            # Ensure that no standard facet has more results than current
            # search results
            for count_tuple in facet_widget.items:
                self.assertTrue(sqs.count() >= count_tuple[1])

        # Test default site facets against PySolr query
        from django.core.cache import cache
        cache.clear()
        default_cf = factories.CustomFacetFactory.build(name="Default Facet",
                                                        title=u"Специалист",
                                                        group=group,
                                                        show_production=True)
        default_cf.save()
        default_site_facet = factories.SeoSiteFacetFactory(
            seosite=self.site,
            facet_type=factories.SeoSiteFacet.DEFAULT,
            customfacet=default_cf)
        default_site_facet.save()
        resp = self.client.get('/jobs/',
                               HTTP_HOST=self.site.domain,
                               follow=True)
        total_jobs = resp.context['total_jobs_count']
        solr_jobs = self.conn.search(q=u"title:Специалист")
        self.assertEqual(total_jobs, solr_jobs.hits)
        self.assertEqual(len(resp.context['default_jobs']), total_jobs)
        for facet_widget in resp.context['widgets']:
            for count_tuple in facet_widget.items:
                self.assertTrue(sqs.count() >= count_tuple[1])

        # Feed test
        resp = self.client.get('/feed/json', HTTP_HOST=self.site.domain)
        jobs = json.loads(resp.content)
        self.assertEqual(len(jobs), total_jobs)
        for job in jobs:
            resp = self.client.get(job['url'],
                                   HTTP_HOST=self.site.domain,
                                   follow=False)
            self.assertEqual(resp.status_code, 302)
            expected = 'https://my.jobs/%s%d?my.jobs.site.id=%s' %\
                       (job['guid'],
                        settings.FEED_VIEW_SOURCES['json'],
                        str(self.site.pk))
            self.assertEqual(resp['Location'], expected)

        # Sitemap index Test - Since sitemap only builds out updates from the
        # last 30 days, this test will eventually be checking 0 jobs in sitemap
        # TODO, find a way to keep feed dates current. We might be able to use
        # the mock library to override datetime functions
        resp = self.client.get('/sitemap.xml', HTTP_HOST=self.site.domain)
        root = etree.fromstring(resp.content)
        self.assertGreater(len(root), 0)
        crawled_jobs = 0
        for loc, lastmod in root:
            self.assertTrue(loc.text)
            resp = self.client.get(loc.text, HTTP_HOST=self.site.domain)
            self.assertEqual(resp.status_code, 200)
            # Get the first daily sitemap
            urlset = etree.fromstring(resp.content)
            # Check each job in daily sitemap - I'm a bot
            for loc, _, _, _ in urlset:
                resp = self.client.get(loc.text, HTTP_HOST=self.site.domain)
                self.assertEqual(resp.status_code, 200)
                self.assertIn(str(resp.context['the_job'].uid), loc.text)
                crawled_jobs += 1