Example #1
0
 def test_dev2_feed(self):
     filepath = download_feed_file(self.buid_id)
     results = DEv2JobFeed(filepath)
     jobs = results.jobparse()
     self.assertEqual(results.jsid, self.buid_id)
     self.assertEqual(results.job_source_name, self.businessunit.title)
     self.assertEqual(len(jobs), self.numjobs)
 def test_dev2_feed(self):
     filepath = download_feed_file(self.buid_id)
     results = DEv2JobFeed(filepath)
     jobs = results.jobparse()
     self.assertEqual(results.jsid, self.buid_id)
     self.assertEqual(results.job_source_name, self.businessunit.title)
     self.assertEqual(len(jobs), self.numjobs)
 def test_salt_date(self):
     """
     Test to ensure that job postings show up in a quasi-random
     fashion by sorting by the `salted_date` attribute in the index
     vice strictly by `date_new`.
     
     """
     filepath = download_feed_file(self.buid_id)
     jobs = DEv2JobFeed(filepath)
     solrjobs = jobs.solr_jobs()
     self.conn.add(solrjobs)
     results = self.conn.search(q="*:*", sort="salted_date asc")
     self.assertEqual(self.numjobs, results.hits)
     # We can't really test for inequality between the two result sets,
     # since sometimes results.docs will equal results2.docs.
     results2 = self.conn.search(q="*:*", sort="date_new asc")
     self.assertItemsEqual(results2.docs, results.docs)
    def test_zipcode(self):
        """
        Tests to ensure proper behavior of zipcode field in being entered in
        Solr.

        """
        filepath = download_feed_file(self.buid_id)
        dbresults = DEv2JobFeed(filepath)
        solrresults = dbresults.solr_jobs()

        zips_from_feedfile = ['30269', '30269', '48332', '30269', '30269',
                              '30269', '30269', '30269', '48332', '48332',
                              '30269', None, '30269', '30269']

        solrzips = [i['zipcode'] for i in solrresults]
        for coll in [solrzips]:
            self.assertItemsEqual(zips_from_feedfile, coll)
Example #5
0
    def test_salt_date(self):
        """
        Test to ensure that job postings show up in a quasi-random
        fashion by sorting by the `salted_date` attribute in the index
        vice strictly by `date_new`.

        """
        filepath = download_feed_file(self.buid_id)
        jobs = DEv2JobFeed(filepath)
        solrjobs = jobs.solr_jobs()
        self.conn.add(solrjobs)
        results = self.conn.search(q="*:*", sort="salted_date asc")
        self.assertEqual(self.numjobs, results.hits)
        # We can't really test for inequality between the two result sets,
        # since sometimes results.docs will equal results2.docs.
        results2 = self.conn.search(q="*:*", sort="date_new asc")
        self.assertItemsEqual(results2.docs, results.docs)
Example #6
0
    def test_zipcode(self):
        """
        Tests to ensure proper behavior of zipcode field in being entered in
        Solr.

        """
        filepath = download_feed_file(self.buid_id)
        dbresults = DEv2JobFeed(filepath)
        solrresults = dbresults.solr_jobs()

        zips_from_feedfile = ['30269', '30269', '48332', '30269', '30269',
                              '30269', '30269', '30269', '48332', '48332',
                              '30269', None, '30269', '30269']

        solrzips = [i['zipcode'] for i in solrresults]
        for coll in [solrzips]:
            self.assertItemsEqual(zips_from_feedfile, coll)
 def test_mocids(self):
     """
     Tests that mocid fields exist when jobs are imported from a feed and
     added to a solr connnection
     
     """
     filepath = download_feed_file(self.buid_id)
     results = DEv2JobFeed(filepath)
     jobs = results.solr_jobs()
     # Since we're going to be adding/updating data in the Solr index, we're
     # hardcoding in the local Solr instance so that we don't accidentally
     # alter production data.
     self.conn.add(jobs)
     num_hits = self.conn.search(q="*:*",
                                 fq="buid:%s -mocid:[* TO *]" % self.buid_id)
     self.assertEqual(num_hits.hits, self.numjobs)
     for job in jobs:
         self.assertTrue('mocid' in job)
Example #8
0
    def test_mocids(self):
        """
        Tests that mocid fields exist when jobs are imported from a feed and
        added to a solr connnection

        """
        filepath = download_feed_file(self.buid_id)
        results = DEv2JobFeed(filepath)
        jobs = results.solr_jobs()
        # Since we're going to be adding/updating data in the Solr index, we're
        # hardcoding in the local Solr instance so that we don't accidentally
        # alter production data.
        self.conn.add(jobs)
        num_hits = self.conn.search(q="*:*",
                                    fq="buid:%s -mocid:[* TO *]" % self.buid_id)
        self.assertEqual(num_hits.hits, self.numjobs)
        for job in jobs:
            self.assertTrue('mocid' in job)
    def test_date_updated(self):
        """
        Test to ensure proper behavior of date updated field when added to
        Solr.

        """
        filepath = download_feed_file(self.buid_id)
        jobs = DEv2JobFeed(filepath)
        solrjobs = jobs.solr_jobs()
        self.conn.add(solrjobs)
        dates_updated = [datetime.datetime.strptime("4/16/2015 11:35:13 PM",
                                                    "%m/%d/%Y %I:%M:%S %p"),
                         datetime.datetime.strptime("4/16/2015 11:35:14 PM",
                                                    "%m/%d/%Y %I:%M:%S %p"),
                         datetime.datetime.strptime("4/16/2015 11:35:15 PM",
                                                    "%m/%d/%Y %I:%M:%S %p")]
        solr_dates = [i['date_updated'] for i in solrjobs]
        for solr_date in solr_dates:
            self.assertIn(solr_date, dates_updated)
Example #10
0
    def test_date_updated(self):
        """
        Test to ensure proper behavior of date updated field when added to
        Solr.

        """
        filepath = download_feed_file(self.buid_id)
        jobs = DEv2JobFeed(filepath)
        solrjobs = jobs.solr_jobs()
        self.conn.add(solrjobs)
        dates_updated = [datetime.datetime.strptime("4/16/2015 11:35:13 PM",
                                                    "%m/%d/%Y %I:%M:%S %p"),
                         datetime.datetime.strptime("4/16/2015 11:35:14 PM",
                                                    "%m/%d/%Y %I:%M:%S %p"),
                         datetime.datetime.strptime("4/16/2015 11:35:15 PM",
                                                    "%m/%d/%Y %I:%M:%S %p")]
        solr_dates = [i['date_updated'] for i in solrjobs]
        for solr_date in solr_dates:
            self.assertIn(solr_date, dates_updated)
Example #11
0
def seoxml_to_mongo(buid, data_dir=DATA_DIR):
    filepath = download_feed_file(buid, data_dir=data_dir)

    jobfeed = DEv2JobFeed(filepath, jsid=buid, markdown=False, company=None)
    # If the feed file did not pass validation, return. The return value is
    # '(0, 0)' to match what's returned on a successful parse.
    if jobfeed.errors:
        error = jobfeed.error_messages
        logging.error("BUID:%s - Feed file has failed validation on line %s. "
                      "Exception: %s" %
                      (buid, error['line'], error['exception']))
        raise FeedImportError(error)

    # A dictionary of uids
    jobfeed.jobparse()
    jobs = jobfeed.solr_jobs()

    collection = connect_db().db.jobs
    bulk = collection.initialize_unordered_bulk_op()
    for job in jobs:
        bulk.find({'guid': job['guid']}).upsert().replace_one(job)
    bulk.execute()
Example #12
0
def seoxml_to_mongo(buid, data_dir=DATA_DIR):
    filepath = download_feed_file(buid, data_dir=data_dir)

    jobfeed = DEv2JobFeed(filepath, jsid=buid, markdown=False,
                          company=None)
    # If the feed file did not pass validation, return. The return value is
    # '(0, 0)' to match what's returned on a successful parse.
    if jobfeed.errors:
        error = jobfeed.error_messages
        logging.error("BUID:%s - Feed file has failed validation on line %s. "
                      "Exception: %s" % (buid, error['line'],
                                         error['exception']))
        raise FeedImportError(error)

    # A dictionary of uids
    jobfeed.jobparse()
    jobs = jobfeed.solr_jobs()

    collection = connect_db().db.jobs
    bulk = collection.initialize_unordered_bulk_op()
    for job in jobs:
        bulk.find({'guid': job['guid']}).upsert().replace_one(job)
    bulk.execute()
 def _get_feedfile(self):
     # Download the 'real' feed file then copy the empty feed file in its
     # place.
     realfeed = download_feed_file(self.buid_id)
     shutil.copyfile(realfeed, "%s.bak" % realfeed)
     shutil.copyfile(self.emptyfeed, realfeed)
Example #14
0
 def _get_feedfile(self):
     # Download the 'real' feed file then copy the empty feed file in its
     # place.
     realfeed = download_feed_file(self.buid_id)
     shutil.copyfile(realfeed, "%s.bak" % realfeed)
     shutil.copyfile(self.emptyfeed, realfeed)