Exemple #1
0
    def test_analytics_delete_old_data(self):
        """
        When Solr is updated with analytics data, we should delete all docs
        from the "current" collection older than 30 days.
        """
        solr = Solr()

        # Create old logs that will be pruned when delete is run
        logs = [MockLog(log_type=type_, delta=datetime.timedelta(days=-31))
                for type_ in ['analytics', 'redirect']]
        parse_log(logs, self.test_solr)
        results = solr.search(q='doc_type:analytics')
        self.assertEqual(results.hits, 2, 'Old logs were not added')
        old_uids = {doc['uid'] for doc in results.docs}

        # Create logs timestamped for today
        logs = [MockLog(log_type=type_) for type_ in ['analytics', 'redirect']]
        parse_log(logs, self.test_solr)
        results = solr.search(q='doc_type:analytics')
        self.assertEqual(results.hits, 4, 'New logs were not added')
        all_uids = {doc['uid'] for doc in results.docs}

        # delete_old_analytics_docs is called after parse_logs in read_new_logs
        # and has not been called yet. Call it now
        delete_old_analytics_docs()
        results = solr.search(q='doc_type:analytics')
        self.assertEqual(results.hits, 2, 'Old logs were not deleted')
        new_uids = {doc['uid'] for doc in results.docs}

        # Ensure that the correct documents have been added/removed
        # The old and new uid sets should be disjoint (no elements in common)
        self.assertTrue(old_uids.isdisjoint(new_uids),
                        'Sets are not disjoint; Intersecting elements: %s' %
                        str(old_uids.intersection(new_uids)))
        # Since the old and new uid sets have nothing in common, their union
        # should equal the set of all uids
        self.assertEqual(old_uids.union(new_uids),
                         all_uids,
                         'Sets are not equal; difference: %s' %
                         str(old_uids.union(new_uids).symmetric_difference(
                             all_uids)))
Exemple #2
0
    def test_analytics_log_parsing(self):
        """
        Ensure that analytics logs are parsed and stored in solr correctly
        """
        company = CompanyFactory(id=1)
        business_unit = BusinessUnitFactory(id=1000)
        company.job_source_ids.add(business_unit)

        # match and no_match will be used later to ensure that the correct
        # number of documents were associated with a company or associated
        # with the default company
        match = Mock(
            wraps=lambda: self.assertEqual(doc['company_id'], company.pk))
        no_match = Mock(
            wraps=lambda: self.assertEqual(doc['company_id'], 999999))

        for log_type in ['analytics', 'redirect']:
            log = MockLog(log_type=log_type)
            parse_log([log], self.test_solr)

            solr = Solr()
            results = solr.search(q='uid:analytics*')

            # fake logs contain two lines - one human and one bot hit
            # If it is getting processed correctly, there should be only one
            # hit recorded
            self.assertEqual(results.hits, 1)
            multi_field = 'facets'
            if log_type == 'redirect':
                with self.assertRaises(KeyError):
                    results.docs[0][multi_field]
            else:
                self.assertEqual(len(results.docs[0][multi_field]), 2)
            for field in results.docs[0].keys():
                if field != multi_field:
                    self.assertTrue(type(results.docs[0][field] != list))
            uuid.UUID(results.docs[0]['aguid'])
            with self.assertRaises(KeyError):
                results.docs[0]['User_user_guid']

            for doc in results.docs:
                if doc['job_view_buid'] == business_unit.pk:
                    # If business units match, company ids should match
                    match()
                else:
                    # Business units don't match; company id should be set to
                    # the default company
                    no_match()

            solr.delete()
            user = UserFactory(email="*****@*****.**")
            user.user_guid = '1e5f7e122156483f98727366afe06e0b'
            user.save()
            parse_log([log], self.test_solr)
            results = solr.search(q='uid:analytics*')
            for guid in ['aguid', 'User_user_guid']:
                uuid.UUID(results.docs[0][guid])

            solr.delete()
            user.delete()

        # We have already determined that there are only two documents.
        # Ensure that there is exactly one document that matches a specific
        # company and one document that was given the default company
        self.assertEqual(match.call_count, 1)
        self.assertEqual(no_match.call_count, 1)