def jobsfs_to_mongo(guid, buid, name): """Composed method for resopnding to a guid update.""" assert re.match(r'^[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$', guid.upper()), \ "%s is not a valid guid" % guid assert re.match(r'^\d+$', str(buid)), "%s is not a valid buid" % buid logger.info("Updating Job Source %s", guid) # Make the BusinessUnit and Company create_businessunit(buid) bu = BusinessUnit.objects.get(id=buid) bu.title = name bu.save() add_company(bu) # Lookup the jobs, filter then, transform them, and then load the jobs zf = get_jobsfs_zipfile(guid) jobs = get_jobs_from_zipfile(zf, guid) jobs = filter_current_jobs(jobs, bu) jobs = (hr_xml_to_json(job, bu) for job in jobs) jobs = list(jobs) for job in jobs: job['guid'] = job['guid'].lower() if len(jobs) > 0: collection = connect_db().db.jobs bulk = collection.initialize_unordered_bulk_op() for job in jobs: bulk.find({'guid': job['guid']}).upsert().replace_one(job) bulk.execute()
def update_job_source(guid, buid, name, clear_cache=False): """Composed method for resopnding to a guid update.""" logger.info("Updating Job Source %s", guid) # Make the BusinessUnit and Company create_businessunit(buid) bu = BusinessUnit.objects.get(id=buid) bu.title = name bu.save() add_company(bu) # Lookup the jobs, filter then, transform them, and then load the jobs zf = get_jobsfs_zipfile(guid) jobs = get_jobs_from_zipfile(zf, guid) jobs = filter_current_jobs(jobs, bu) jobs = [hr_xml_to_json(job, bu) for job in jobs] for job in jobs: job['link'] = make_redirect(job, bu).make_link() add_jobs(jobs) remove_expired_jobs(buid, jobs) # Update business information bu.associated_jobs = len(jobs) bu.date_updated = datetime.datetime.utcnow() bu.save() if clear_cache: # Clear cache in 25 minutes to allow for solr replication tasks.task_clear_bu_cache.delay(buid=bu.id, countdown=1500)
def test_salted_date_is_based_on_date_new(self): add_company(self.businessunit) transformed_job = hr_xml_to_json(self.jobs[0], self.businessunit) print "\nTRANSFORMED: %s\n" % transformed_job['guid'] expected = datetime.datetime.strptime("2016-07-02", "%Y-%m-%d").date() actual = transformed_job['salted_date'].date() self.assertEqual(expected, actual, "'Salted_date' is expected to be the same date as date_new, it is not. %s is not %s" % (actual, expected))
def test_dates_have_timezones(self): """Assert that dates on imports can be resolved to specifc utc times""" with open(self.document) as f: etree = lxml.etree.fromstring(f.read()) result = transform.hr_xml_to_json(etree, self.bu) # Check date_updated date_updated = result['date_updated'] # Assert it has a timezone self.assertIsNotNone(date_updated.tzinfo, msg="The date_updated should have a "\ "timezone associated with it.") # Assert the datetime is correct when converted to UTC. actual_utc = date_updated.astimezone(pytz.UTC) expected = datetime.datetime(2016, 01, 27, 20, 57, 03, 997000, pytz.UTC) self.assertEqual( actual_utc, expected, msg="date_updated is '%s', it should equal '%s'" % (date_updated.astimezone(pytz.UTC), expected.isoformat())) # Check date_created date_new = result['date_new'] # Assert it has a timezone self.assertIsNotNone(date_new.tzinfo, msg="The date_new should have a "\ "timezone associated with it.") # Assert the datetime is correct when converted to UTC. actual_utc = date_new.astimezone(pytz.UTC) expected = datetime.datetime(2016, 01, 27, 20, 57, 03, 997000, pytz.UTC) self.assertEqual(actual_utc, expected, msg="date_new is '%s', it should equal '%s'" % (date_new.astimezone(pytz.UTC), expected.isoformat()))
def update_job_source(guid, buid, name, clear_cache=False): """Composed method for resopnding to a guid update.""" assert(re.match(r'^[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$', guid), "%s is not a valid guid" % guid) assert(re.match(r'^\d+$', str(buid)), "%s is not a valid buid" % buid) logger.info("Updating Job Source %s", guid) # Make the BusinessUnit and Company create_businessunit(buid) bu = BusinessUnit.objects.get(id=buid) bu.title = name bu.save() add_company(bu) # Lookup the jobs, filter then, transform them, and then load the jobs zf = get_jobsfs_zipfile(guid) jobs = get_jobs_from_zipfile(zf, guid) jobs = filter_current_jobs(jobs, bu) jobs = (hr_xml_to_json(job, bu) for job in jobs) jobs = (add_redirect(job, bu) for job in jobs) # AT&T Showed that large numbers of MOCs can cause import issues due to the size of documents. # Therefore, when processing AT&T lower the document chunk size. if int(buid) == 19389: logger.warn("AT&T has large amounts of mapped_mocs, that cause problems. Reducing chunk size.") upload_chunk_size = 64 else: upload_chunk_size = 1024 job_ids = add_jobs(jobs, upload_chunk_size) remove_expired_jobs(buid, job_ids) # Update business information bu.associated_jobs = len(job_ids) bu.date_updated = datetime.datetime.utcnow() bu.save() if clear_cache: # Clear cache in 25 minutes to allow for solr replication tasks.task_clear_bu_cache.delay(buid=bu.id, countdown=1500)
def test_dates_have_timezones(self): """Assert that dates on imports can be resolved to specifc utc times""" with open(self.HRXML_DOC) as f: etree = lxml.etree.fromstring(f.read()) result = transform.hr_xml_to_json(etree, self.bu) # Check date_updated date_updated = result['date_updated'] # Assert it has a timezone self.assertIsNotNone(date_updated.tzinfo, msg="The date_updated should have a "\ "timezone associated with it.") # Assert the datetime is correct when converted to UTC. actual_utc = date_updated.astimezone(pytz.UTC) expected = datetime.datetime(2015, 12, 23, 06, 48, 11, 533000, pytz.UTC) self.assertEqual(actual_utc, expected, msg="date_updated is '%s', it should equal '%s'" % ( date_updated.astimezone(pytz.UTC), expected.isoformat())) # Check date_created date_new = result['date_new'] # Assert it has a timezone self.assertIsNotNone(date_new.tzinfo, msg="The date_new should have a "\ "timezone associated with it.") # Assert the datetime is correct when converted to UTC. actual_utc = date_new.astimezone(pytz.UTC) expected = datetime.datetime(2015, 11, 01, 05, 48, 11, 0, pytz.UTC) self.assertEqual(actual_utc, expected, msg="date_new is '%s', it should equal '%s'" % ( date_new.astimezone(pytz.UTC), expected.isoformat()))
def update_job_source(guid, buid, name): """Composed method for resopnding to a guid update.""" assert re.match(r'^[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$', guid.upper()), \ "%s is not a valid guid" % guid assert re.match(r'^\d+$', str(buid)), "%s is not a valid buid" % buid logger.info("Updating Job Source %s", guid) # Make the BusinessUnit and Company create_businessunit(buid) bu = BusinessUnit.objects.get(id=buid) bu.title = name bu.save() add_company(bu) # Lookup the jobs, filter then, transform them, and then load the jobs zf = get_jobsfs_zipfile(guid) jobs = get_jobs_from_zipfile(zf, guid) jobs = filter_current_jobs(jobs, bu) jobs = (hr_xml_to_json(job, bu) for job in jobs) jobs = (add_redirect(job, bu) for job in jobs) # AT&T Showed that large numbers of MOCs can cause import issues due to the size of documents. # Therefore, when processing AT&T lower the document chunk size. if int(buid) == 19389: logger.warn( "AT&T has large amounts of mapped_mocs, that cause problems. Reducing chunk size." ) upload_chunk_size = 64 else: upload_chunk_size = 1024 job_ids = add_jobs(jobs, upload_chunk_size) remove_expired_jobs(buid, job_ids) # Update business information bu.associated_jobs = len(job_ids) bu.date_updated = datetime.datetime.utcnow() bu.save()