def test_solr_rm_feedfile(self): """ Test that at the end of Solr parsing, the feed file is deleted. """ update_solr(self.buid_id) self.assertFalse(os.access(self.filepath, os.F_OK))
def task_update_solr(jsid, **kwargs): try: import_jobs.update_solr(jsid, **kwargs) if kwargs.get('clear_cache', False): task_clear_bu_cache.delay(buid=int(jsid), countdown=1500) ImportRecord(buid=int(jsid), success=True).save() except Exception as e: logging.error(traceback.format_exc(sys.exc_info())) ImportRecord(buid=int(jsid), success=False).save() raise task_update_solr.retry(exc=e)
def test_empty_solr(self): """ Tests for the proper behavior when encountering a job-less, but otherwise valid, feed file. The proper behavior is to delete any jobs associated with that BusinessUnit from the Solr index. """ # Normal download-and-parse operation on a feed file with jobs. update_solr(self.buid_id) results = self.conn.search(q="*:*", fq="buid:%s" % self.buid_id) self.assertEqual(results.hits, self.numjobs) # Download-and-parse operation on a feed file with no jobs. Expected # behavior is to delete all jobs. self._get_feedfile() update_solr(self.buid_id, download=False) results = self.conn.search(q="*:*", fq="buid:%s" % self.buid_id) self.assertEqual(results.hits, 0)
def test_set_bu_title(self): """ Ensure that if a feedfile for a BusinessUnit comes through, and the `title` attribute for that BusinessUnit is not set, that `helpers.update_solr` sets the `title` attribute properly. """ bu = BusinessUnit.objects.get(id=self.buid_id) bu.title = None bu.save() # Since the BusinessUnit title is None, the intent is that update_solr # will set its title to match the company name found in the feed file. results = update_solr(self.buid_id) # We have to get the updated state of the BusinessUnit instance, since # changes to the database won't be reflected by our in-memory version of # the data. bu = BusinessUnit.objects.get(id=self.buid_id) # The title attribute should now equal the initial value established in # the setUp method. self.assertEquals(self.businessunit.title, bu.title)
def test_unicode_title(self): # Test imports group = factories.GroupFactory() self.site.group = group self.site.business_units.add(self.businessunit) self.site.save() import_jobs.update_solr(self.buid, download=False, delete_feed=False, data_dir='seo/tests/data/') solr_jobs = self.conn.search("*:*") resp = self.client.get('/') self.assertEqual(resp.context['total_jobs_count'], solr_jobs.hits) # test standard facets against Haystack query standard_cf = factories.CustomFacetFactory.build( # default facet will return both jobs name="Keyword Facet", group=group, show_production=True) standard_cf.save() standard_cf.keyword.add(u'Ключевые') standard_cf.save() standard_site_facet = factories.SeoSiteFacetFactory( seosite=self.site, customfacet=standard_cf, facet_type=factories.SeoSiteFacet.STANDARD) standard_site_facet.save() # test standard facets against Haystack query standard_cf2 = factories.CustomFacetFactory.build( # default facet will return both jobs name='Country Facet', country='United States', group=group, show_production=True) standard_cf2.save() standard_site_facet2 = factories.SeoSiteFacetFactory( seosite=self.site, customfacet=standard_cf2, facet_type=factories.SeoSiteFacet.STANDARD) standard_site_facet2.save() resp = self.client.get('/keyword-facet/new-jobs/', HTTP_HOST=self.site.domain, follow=True) sqs = DESearchQuerySet().filter(text=u'Ключевые') self.assertEqual(len(resp.context['default_jobs']), sqs.count()) for facet_widget in resp.context['widgets']: # Ensure that no standard facet has more results than current # search results for count_tuple in facet_widget.items: self.assertTrue(sqs.count() >= count_tuple[1]) # Test default site facets against PySolr query from django.core.cache import cache cache.clear() default_cf = factories.CustomFacetFactory.build( name="Default Facet", title=u"Специалист", group=group, show_production=True) default_cf.save() default_site_facet = factories.SeoSiteFacetFactory( seosite=self.site, facet_type=factories.SeoSiteFacet.DEFAULT, customfacet=default_cf) default_site_facet.save() resp = self.client.get('/jobs/', HTTP_HOST=self.site.domain, follow=True) total_jobs = resp.context['total_jobs_count'] solr_jobs = self.conn.search(q=u"title:Специалист") self.assertEqual(total_jobs, solr_jobs.hits) self.assertEqual(len(resp.context['default_jobs']), total_jobs) for facet_widget in resp.context['widgets']: for count_tuple in facet_widget.items: self.assertTrue(sqs.count() >= count_tuple[1]) # Feed test resp = self.client.get('/feed/json', HTTP_HOST=self.site.domain) jobs = json.loads(resp.content) self.assertEqual(len(jobs), total_jobs) for job in jobs: resp = self.client.get(job['url'], HTTP_HOST=self.site.domain, follow=False) self.assertEqual(resp.status_code, 302) expected = 'http://my.jobs/%s%d?my.jobs.site.id=%s' %\ (job['guid'], settings.FEED_VIEW_SOURCES['json'], str(self.site.pk)) self.assertEqual(resp['Location'], expected) # Sitemap index Test - Since sitemap only builds out updates from the # last 30 days, this test will eventually be checking 0 jobs in sitemap # TODO, find a way to keep feed dates current. We might be able to use # the mock library to override datetime functions resp = self.client.get('/sitemap.xml', HTTP_HOST=self.site.domain) root = etree.fromstring(resp.content) self.assertGreater(len(root), 0) crawled_jobs = 0 for loc, lastmod in root: self.assertTrue(loc.text) resp = self.client.get(loc.text, HTTP_HOST=self.site.domain) self.assertEqual(resp.status_code, 200) # Get the first daily sitemap urlset = etree.fromstring(resp.content) # Check each job in daily sitemap - I'm a bot for loc, _, _, _ in urlset: resp = self.client.get(loc.text, HTTP_HOST=self.site.domain) self.assertEqual(resp.status_code, 200) self.assertIn(str(resp.context['the_job'].uid), loc.text) crawled_jobs += 1
def task_update_solr(jsid, **kwargs): import_jobs.update_solr(jsid, **kwargs)
def task_update_solr(jsid, **kwargs): try: import_jobs.update_solr(jsid, **kwargs) except Exception as e: logging.error(traceback.format_exc(sys.exc_info())) raise task_update_solr.retry(exc=e)
def test_unicode_title(self): # Test imports group = factories.GroupFactory() self.site.group = group self.site.business_units.add(self.businessunit) self.site.save() import_jobs.update_solr(self.buid, download=False, delete_feed=False, data_dir='seo/tests/data/') solr_jobs = self.conn.search("*:*") resp = self.client.get('/') self.assertEqual(resp.context['total_jobs_count'], solr_jobs.hits) # test standard facets against Haystack query standard_cf = factories.CustomFacetFactory.build( # default facet will return both jobs name="Keyword Facet", group=group, show_production=True) standard_cf.save() standard_cf.keyword.add(u'Ключевые') standard_cf.save() standard_site_facet = factories.SeoSiteFacetFactory( seosite=self.site, customfacet=standard_cf, facet_type=factories.SeoSiteFacet.STANDARD) standard_site_facet.save() # test standard facets against Haystack query standard_cf2 = factories.CustomFacetFactory.build( # default facet will return both jobs name='Country Facet', country='United States', group=group, show_production=True) standard_cf2.save() standard_site_facet2 = factories.SeoSiteFacetFactory( seosite=self.site, customfacet=standard_cf2, facet_type=factories.SeoSiteFacet.STANDARD) standard_site_facet2.save() resp = self.client.get('/keyword-facet/new-jobs/', HTTP_HOST=self.site.domain, follow=True) sqs = DESearchQuerySet().filter(text=u'Ключевые') self.assertEqual(len(resp.context['default_jobs']), sqs.count()) for facet_widget in resp.context['widgets']: # Ensure that no standard facet has more results than current # search results for count_tuple in facet_widget.items: self.assertTrue(sqs.count() >= count_tuple[1]) # Test default site facets against PySolr query from django.core.cache import cache cache.clear() default_cf = factories.CustomFacetFactory.build(name="Default Facet", title=u"Специалист", group=group, show_production=True) default_cf.save() default_site_facet = factories.SeoSiteFacetFactory( seosite=self.site, facet_type=factories.SeoSiteFacet.DEFAULT, customfacet=default_cf) default_site_facet.save() resp = self.client.get('/jobs/', HTTP_HOST=self.site.domain, follow=True) total_jobs = resp.context['total_jobs_count'] solr_jobs = self.conn.search(q=u"title:Специалист") self.assertEqual(total_jobs, solr_jobs.hits) self.assertEqual(len(resp.context['default_jobs']), total_jobs) for facet_widget in resp.context['widgets']: for count_tuple in facet_widget.items: self.assertTrue(sqs.count() >= count_tuple[1]) # Feed test resp = self.client.get('/feed/json', HTTP_HOST=self.site.domain) jobs = json.loads(resp.content) self.assertEqual(len(jobs), total_jobs) for job in jobs: resp = self.client.get(job['url'], HTTP_HOST=self.site.domain, follow=False) self.assertEqual(resp.status_code, 302) expected = 'https://my.jobs/%s%d?my.jobs.site.id=%s' %\ (job['guid'], settings.FEED_VIEW_SOURCES['json'], str(self.site.pk)) self.assertEqual(resp['Location'], expected) # Sitemap index Test - Since sitemap only builds out updates from the # last 30 days, this test will eventually be checking 0 jobs in sitemap # TODO, find a way to keep feed dates current. We might be able to use # the mock library to override datetime functions resp = self.client.get('/sitemap.xml', HTTP_HOST=self.site.domain) root = etree.fromstring(resp.content) self.assertGreater(len(root), 0) crawled_jobs = 0 for loc, lastmod in root: self.assertTrue(loc.text) resp = self.client.get(loc.text, HTTP_HOST=self.site.domain) self.assertEqual(resp.status_code, 200) # Get the first daily sitemap urlset = etree.fromstring(resp.content) # Check each job in daily sitemap - I'm a bot for loc, _, _, _ in urlset: resp = self.client.get(loc.text, HTTP_HOST=self.site.domain) self.assertEqual(resp.status_code, 200) self.assertIn(str(resp.context['the_job'].uid), loc.text) crawled_jobs += 1