def analytics(employer, company, candidate): if employer not in company.admins.all(): raise Http404 if hasattr(mail, 'outbox'): solr = settings.TEST_SOLR_INSTANCE else: solr = settings.SOLR analytics_solr = Solr(solr['current']).add_filter_query( 'company_id:%d' % company.pk).add_query('User_id:%d' % candidate.pk) return analytics_solr.search().docs
def test_savedsearch_no_user(self): """ A saved search with a null recipient should never be inserted in Solr """ solr = Solr() SavedSearchFactory(user=self.user) update_solr_task(self.test_solr) results = solr.search(q='*:*') # One hit for the search, one for its recipient self.assertEqual(results.hits, 2) solr.delete() search = SavedSearchFactory(user=None) self.assertEqual(object_to_dict(SavedSearch, search), None) update_solr_task(self.test_solr) results = solr.search(q='*:*') self.assertEqual(results.hits, 0)
def test_analytics_delete_old_data(self): """ When Solr is updated with analytics data, we should delete all docs from the "current" collection older than 30 days. """ solr = Solr() # Create old logs that will be pruned when delete is run logs = [MockLog(log_type=type_, delta=datetime.timedelta(days=-31)) for type_ in ['analytics', 'redirect']] parse_log(logs, self.test_solr) results = solr.search(q='doc_type:analytics') self.assertEqual(results.hits, 2, 'Old logs were not added') old_uids = {doc['uid'] for doc in results.docs} # Create logs timestamped for today logs = [MockLog(log_type=type_) for type_ in ['analytics', 'redirect']] parse_log(logs, self.test_solr) results = solr.search(q='doc_type:analytics') self.assertEqual(results.hits, 4, 'New logs were not added') all_uids = {doc['uid'] for doc in results.docs} # delete_old_analytics_docs is called after parse_logs in read_new_logs # and has not been called yet. Call it now delete_old_analytics_docs() results = solr.search(q='doc_type:analytics') self.assertEqual(results.hits, 2, 'Old logs were not deleted') new_uids = {doc['uid'] for doc in results.docs} # Ensure that the correct documents have been added/removed # The old and new uid sets should be disjoint (no elements in common) self.assertTrue(old_uids.isdisjoint(new_uids), 'Sets are not disjoint; Intersecting elements: %s' % str(old_uids.intersection(new_uids))) # Since the old and new uid sets have nothing in common, their union # should equal the set of all uids self.assertEqual(old_uids.union(new_uids), all_uids, 'Sets are not equal; difference: %s' % str(old_uids.union(new_uids).symmetric_difference( all_uids)))
def filter_by_microsite(company, user_solr=None, facet_solr=None): """ Applies solr filters based on company. inputs: :microsites: A list of the microsites to filter the SavedSearches on. :user_solr: A Solr instance used for retrieving SavedSearch documents from solr. :facet_solr: A Solr instance used for retrieving facets based on ProfileUnits data. outputs: user_solr and facet_solr filtered by the company. """ user_solr = Solr() if not user_solr else user_solr facet_solr = Solr() if not facet_solr else facet_solr query = 'SavedSearch_company_id:%s' % company.pk user_solr = user_solr.add_filter_query(query) facet_solr = facet_solr.add_query(query) user_solr = user_solr.add_filter_query('User_opt_in_employers:true') facet_solr = facet_solr.add_query('User_opt_in_employers:true') return user_solr, facet_solr
def filter_by_domain(domain, user_solr=None, facet_solr=None): """ Applies solr filters to SavedSearch.url based on a domain. inputs: :domain: The domain to filter the SavedSearches on. :user_solr: A Solr instance used for retrieving SavedSearch documents from solr. :facet_solr: A Solr instance used for retrieving facets based on ProfileUnits data. outputs: user_solr and facet_solr filtered by the company. """ user_solr = Solr() if not user_solr else user_solr facet_solr = Solr() if not facet_solr else facet_solr query = 'SavedSearch_feed:*%s*' % domain user_solr = user_solr.add_filter_query(query) facet_solr = facet_solr.add_filter_query(query) return user_solr, facet_solr
def apply_facets_and_filters(request, user_solr=None, facet_solr=None): """ Applies facets to solr based on filters currently applied and creates a dictionary of removable terms and the resulting url with the term removed. inputs: :user_solr: A Solr instance used for retrieving SavedSearch documents from solr. :facet_solr: A Solr instance used for retrieving facets based on ProfileUnits data. :loc_solr: A solr instance used for retrieving facets based on location slabs. outputs: user_solr, facet_solr, and loc_solr appropriately filtered as well as a dictionary of {'the applied filter': 'resulting url if the filter were removed'} """ url = request.build_absolute_uri() date_start = request.REQUEST.get('date_start', None) date_end = request.REQUEST.get('date_end', None) if date_start: url = update_url_param(url, 'date_start', request.REQUEST.get('date_start')) if date_end: url = update_url_param(url, 'date_end', request.REQUEST.get('date_end')) filters = {} user_solr = Solr() if not user_solr else user_solr facet_solr = Solr() if not facet_solr else facet_solr # The location parameter should be "Country-Region-City". Faceting # is determined on what is already in the location parameter. # No location facets on country, country parameter facets on state, and # country-state facets on city. if not 'location' in request.GET: facet_solr = facet_solr.add_facet_field('Address_country_code') facet_solr = facet_solr.add_facet_field('Address_region') else: fieldname = 'Address_full_location' term = urllib.unquote(request.GET.get('location')) search_term = term.replace("-", "##").replace(" ", "\ ") if len(term.split("-")) == 3: q = '%s:%s' % (fieldname, search_term) else: q = '%s:%s##*' % (fieldname, search_term) user_solr = user_solr.add_query(q) facet_solr = facet_solr.add_filter_query(q) term_list = term.split("-") term_len = len(term_list) prefix = '%s##' % term.replace('-', '##') if term_len == 3: # Country, Region, City included. No reason to facet on location. city = term_list[2] if term_list[2] else 'None' region = term_list[1] if term_list[1] else 'None' country = term_list[0] remove_term = "%s, %s, %s" % (city, region, country) new_val = "%s-%s" % (term_list[0], term_list[1]) filters[remove_term] = update_url_param(url, 'location', new_val) elif term_len == 2: # Country, Region included. region = term_list[1] if term_list[1] else 'None' country = term_list[0] remove_term = "%s, %s" % (region, country) filters[remove_term] = update_url_param(url, 'location', term_list[0]) facet_solr = facet_solr.add_facet_field(fieldname) facet_solr = facet_solr.add_facet_prefix(prefix, fieldname=fieldname) elif term_len == 1: # Country included. country = country_codes.get(term_list[0], term_list[0]) filters[country] = remove_param_from_url(url, 'location') facet_solr = facet_solr.add_facet_field('Address_region') facet_solr = facet_solr.add_facet_prefix(prefix, fieldname=fieldname) if not 'education' in request.GET: facet_solr = facet_solr.add_facet_field('Education_education_level_code') else: term = urllib.unquote(request.GET.get('education')) term_name = edu_codes.get(int(term)) filters[term_name] = remove_param_from_url(url, 'education') q = 'Education_education_level_code:%s' % term user_solr = user_solr.add_query(q) facet_solr = facet_solr.add_filter_query(q) if not 'license' in request.GET: facet_solr = facet_solr.add_facet_field('License_license_name') else: term = urllib.unquote(request.GET.get('license')) filters[term] = remove_param_from_url(url, 'license') q = 'License_license_name:"%s"' % term user_solr = user_solr.add_query(q) facet_solr = facet_solr.add_filter_query(q) return user_solr, facet_solr, filters
def test_analytics_log_parsing(self): """ Ensure that analytics logs are parsed and stored in solr correctly """ company = CompanyFactory(id=1) business_unit = BusinessUnitFactory(id=1000) company.job_source_ids.add(business_unit) # match and no_match will be used later to ensure that the correct # number of documents were associated with a company or associated # with the default company match = Mock( wraps=lambda: self.assertEqual(doc['company_id'], company.pk)) no_match = Mock( wraps=lambda: self.assertEqual(doc['company_id'], 999999)) for log_type in ['analytics', 'redirect']: log = MockLog(log_type=log_type) parse_log([log], self.test_solr) solr = Solr() results = solr.search(q='uid:analytics*') # fake logs contain two lines - one human and one bot hit # If it is getting processed correctly, there should be only one # hit recorded self.assertEqual(results.hits, 1) multi_field = 'facets' if log_type == 'redirect': with self.assertRaises(KeyError): results.docs[0][multi_field] else: self.assertEqual(len(results.docs[0][multi_field]), 2) for field in results.docs[0].keys(): if field != multi_field: self.assertTrue(type(results.docs[0][field] != list)) uuid.UUID(results.docs[0]['aguid']) with self.assertRaises(KeyError): results.docs[0]['User_user_guid'] for doc in results.docs: if doc['job_view_buid'] == business_unit.pk: # If business units match, company ids should match match() else: # Business units don't match; company id should be set to # the default company no_match() solr.delete() user = UserFactory(email="*****@*****.**") user.user_guid = '1e5f7e122156483f98727366afe06e0b' user.save() parse_log([log], self.test_solr) results = solr.search(q='uid:analytics*') for guid in ['aguid', 'User_user_guid']: uuid.UUID(results.docs[0][guid]) solr.delete() user.delete() # We have already determined that there are only two documents. # Ensure that there is exactly one document that matches a specific # company and one document that was given the default company self.assertEqual(match.call_count, 1) self.assertEqual(no_match.call_count, 1)
def dashboard(request, template="mydashboard/mydashboard.html", extra_context=None): """ Returns a list of candidates who created a saved search for one of the microsites within the company microsite list or with the company name like jobs.jobs/company_name/careers for example between the given (optional) dates Inputs: :company: company.id that is associated with request.user Returns: :render_to_response: renders template with context dict """ if hasattr(mail, 'outbox'): solr = settings.TEST_SOLR_INSTANCE else: solr = settings.SOLR company = get_company(request) if not company: raise Http404 user_solr = Solr(solr['current']) facet_solr = Solr(solr['current']) # Add join only if we're using facets, not if we're simply searching. query_params = {'search', 'company'} if not query_params.issuperset({q_key for q_key in request.GET.keys() if q_key not in ['querystring_key', 'date_end', 'date_start', 'page']}): user_solr = user_solr.add_join(from_field='ProfileUnits_user_id', to_field='User_id') facet_solr = facet_solr.add_join(from_field='User_id', to_field='ProfileUnits_user_id') facet_solr = facet_solr.rows_to_fetch(0) authorized_microsites, buids = get_company_microsites(company) admins = CompanyUser.objects.filter(company=company.id) # Removes main user from admin list to display other admins admins = admins.exclude(user=request.user) requested_microsite = request.REQUEST.get('microsite', '') requested_date_button = request.REQUEST.get('date_button', False) candidates_page = request.REQUEST.get('page', 1) # the url value for 'All' in the select box is company name # which then gets replaced with all microsite urls for that company site_name = '' if requested_microsite != '': active_microsites = authorized_microsites.filter( domain__startswith=requested_microsite) user_solr, facet_solr = filter_by_domain(requested_microsite, user_solr, facet_solr) else: active_microsites = authorized_microsites site_name = company.name if not site_name: try: site_name = active_microsites[0] except IndexError: site_name = '' active_microsites = set(active_microsites) rng, date_start, date_end, date_display = filter_by_date(request) user_solr = user_solr.add_filter_query(rng) facet_solr = facet_solr.add_query(rng) if request.GET.get('search', False): user_solr = user_solr.add_query("%s" % request.GET['search']) facet_solr = facet_solr.add_query("%s" % request.GET['search']) user_solr, facet_solr = filter_by_microsite(company, user_solr, facet_solr) (user_solr, facet_solr, filters) = apply_facets_and_filters( request, user_solr, facet_solr) solr_results = user_solr.rows_to_fetch(100).search() # List of dashboard widgets to display. dashboard_widgets = ["home_views", "search_views", "job_views", "apply_clicks", "candidates", "search", "applied_filters", "filters"] # Date button highlighting if 'today' in request.REQUEST: requested_date_button = 'today' elif 'seven_days' in request.REQUEST: requested_date_button = 'seven_days' elif 'thirty_days' in request.REQUEST: requested_date_button = 'thirty_days' url = request.build_absolute_uri() facets = parse_facets(facet_solr.search(), request) context = { 'admin_you': request.user, 'applied_filters': filters, 'candidates_page': candidates_page, 'company_admins': admins, 'company_id': company.id, 'company_microsites': authorized_microsites, 'company_name': company.name, 'dashboard_widgets': dashboard_widgets, 'date_button': requested_date_button, 'date_display': date_display, 'date_end': date_end, 'date_start': date_start, 'date_submit_url': url, 'facets': facets, 'site_name': site_name, 'view_name': 'Company Dashboard', } results = solr_results.docs facet_var_map = { 'home': 'home', 'listing': 'job_view', 'results': 'search', 'redirect': 'apply', } analytics_solr = Solr(solr['current']).add_facet_field( 'page_category') if requested_microsite: analytics_solr = analytics_solr.add_query('domain:%s' % requested_microsite) else: analytics_solr = analytics_solr.add_query('company_id:%d' % company.pk) rng = filter_by_date(request, field='view_date')[0] analytics_solr = analytics_solr.add_filter_query(rng) all_results = analytics_solr.rows_to_fetch(0).search() analytics_facets = all_results.facets.get('facet_fields', {}).get( 'page_category', []) facet_dict = sequence_to_dict(analytics_facets) for key in facet_dict.keys(): context_key = 'total_%s' % facet_var_map.get(key, '') context[context_key] = facet_dict[key] analytics_solr = analytics_solr.add_filter_query('User_id:[* TO *]') analytics_solr = analytics_solr.add_facet_field('domain') auth_results = analytics_solr.search() analytics_facet_list = auth_results.facets.get( 'facet_fields', {}).get('domain', []) analytics_facets = sequence_to_dict(analytics_facet_list) analytics_facets = [domain for domain in analytics_facets.items() if domain[0] in active_microsites] results += auth_results.docs candidates = dict_to_object(results) domains = [get_domain(c.SavedSearch_feed) for c in candidates if hasattr(c, 'SavedSearch_feed')] search_facets = [(domain, domains.count(domain)) for domain in set(domains)] domain_facets = {} for domain, group in groupby(analytics_facets + search_facets, key=lambda x: x[0]): domain_facets[domain] = sum(item[1] for item in group) candidate_list = sorted(candidates, key=lambda y: y.SavedSearch_created_on if hasattr(y, 'SavedSearch_created_on') else y.view_date, reverse=True) context['domain_facets'] = domain_facets context['candidates'] = candidate_list context['total_candidates'] = len([x for x in groupby( candidate_list, key=lambda y: y.User_id)]) if extra_context is not None: context.update(extra_context) return render_to_response(template, context, context_instance=RequestContext(request))