Beispiel #1
0
def jobsfs_to_mongo(guid, buid, name):
    """Composed method for resopnding to a guid update."""

    assert re.match(r'^[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$', guid.upper()), \
           "%s is not a valid guid" % guid
    assert re.match(r'^\d+$', str(buid)), "%s is not a valid buid" % buid

    logger.info("Updating Job Source %s", guid)
    # Make the BusinessUnit and Company
    create_businessunit(buid)
    bu = BusinessUnit.objects.get(id=buid)
    bu.title = name
    bu.save()
    add_company(bu)

    # Lookup the jobs, filter then, transform them, and then load the jobs
    zf = get_jobsfs_zipfile(guid)
    jobs = get_jobs_from_zipfile(zf, guid)
    jobs = filter_current_jobs(jobs, bu)
    jobs = (hr_xml_to_json(job, bu) for job in jobs)
    jobs = list(jobs)
    for job in jobs:
        job['guid'] = job['guid'].lower()

    if len(jobs) > 0:
        collection = connect_db().db.jobs
        bulk = collection.initialize_unordered_bulk_op()
        for job in jobs:
            bulk.find({'guid': job['guid']}).upsert().replace_one(job)
        bulk.execute()
def update_job_source(guid, buid, name, clear_cache=False):
    """Composed method for resopnding to a guid update."""

    logger.info("Updating Job Source %s", guid)
    # Make the BusinessUnit and Company
    create_businessunit(buid)
    bu = BusinessUnit.objects.get(id=buid)
    bu.title = name
    bu.save()
    add_company(bu)

    # Lookup the jobs, filter then, transform them, and then load the jobs
    zf = get_jobsfs_zipfile(guid)
    jobs = get_jobs_from_zipfile(zf, guid)
    jobs = filter_current_jobs(jobs, bu)
    jobs = [hr_xml_to_json(job, bu) for job in jobs]
    for job in jobs:
        job['link'] = make_redirect(job, bu).make_link()
    add_jobs(jobs)
    remove_expired_jobs(buid, jobs)

    # Update business information
    bu.associated_jobs = len(jobs)
    bu.date_updated = datetime.datetime.utcnow()
    bu.save()
    if clear_cache:
        # Clear cache in 25 minutes to allow for solr replication
        tasks.task_clear_bu_cache.delay(buid=bu.id, countdown=1500)
Beispiel #3
0
def jobsfs_to_mongo(guid, buid, name):
    """Composed method for resopnding to a guid update."""

    assert re.match(r'^[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$', guid.upper()), \
           "%s is not a valid guid" % guid
    assert re.match(r'^\d+$', str(buid)), "%s is not a valid buid" % buid

    logger.info("Updating Job Source %s", guid)
    # Make the BusinessUnit and Company
    create_businessunit(buid)
    bu = BusinessUnit.objects.get(id=buid)
    bu.title = name
    bu.save()
    add_company(bu)

    # Lookup the jobs, filter then, transform them, and then load the jobs
    zf = get_jobsfs_zipfile(guid)
    jobs = get_jobs_from_zipfile(zf, guid)
    jobs = filter_current_jobs(jobs, bu)
    jobs = (hr_xml_to_json(job, bu) for job in jobs)
    jobs = list(jobs)
    for job in jobs:
        job['guid'] = job['guid'].lower()

    if len(jobs) > 0:
        collection = connect_db().db.jobs
        bulk = collection.initialize_unordered_bulk_op()
        for job in jobs:
            bulk.find({'guid': job['guid']}).upsert().replace_one(job)
        bulk.execute()
    def test_salted_date_is_based_on_date_new(self):
        add_company(self.businessunit)

        transformed_job = hr_xml_to_json(self.jobs[0], self.businessunit)
        print "\nTRANSFORMED: %s\n" %  transformed_job['guid']

        expected = datetime.datetime.strptime("2016-07-02", "%Y-%m-%d").date()
        actual = transformed_job['salted_date'].date()

        self.assertEqual(expected, actual,
                         "'Salted_date' is expected to be the same date as date_new, it is not. %s is not %s" %
                             (actual, expected))
    def test_dates_have_timezones(self):
        """Assert that dates on imports can be resolved to specifc utc times"""

        with open(self.document) as f:
            etree = lxml.etree.fromstring(f.read())

        result = transform.hr_xml_to_json(etree, self.bu)

        # Check date_updated
        date_updated = result['date_updated']

        # Assert it has a timezone
        self.assertIsNotNone(date_updated.tzinfo,
                             msg="The date_updated should have a "\
                                 "timezone associated with it.")

        # Assert the datetime is correct when converted to UTC.
        actual_utc = date_updated.astimezone(pytz.UTC)
        expected = datetime.datetime(2016, 01, 27, 20, 57, 03, 997000,
                                     pytz.UTC)
        self.assertEqual(
            actual_utc,
            expected,
            msg="date_updated is '%s', it should equal '%s'" %
            (date_updated.astimezone(pytz.UTC), expected.isoformat()))

        # Check date_created
        date_new = result['date_new']

        # Assert it has a timezone
        self.assertIsNotNone(date_new.tzinfo,
                             msg="The date_new should have a "\
                                 "timezone associated with it.")

        # Assert the datetime is correct when converted to UTC.
        actual_utc = date_new.astimezone(pytz.UTC)
        expected = datetime.datetime(2016, 01, 27, 20, 57, 03, 997000,
                                     pytz.UTC)
        self.assertEqual(actual_utc,
                         expected,
                         msg="date_new is '%s', it should equal '%s'" %
                         (date_new.astimezone(pytz.UTC), expected.isoformat()))
Beispiel #6
0
def update_job_source(guid, buid, name, clear_cache=False):
    """Composed method for resopnding to a guid update."""

    assert(re.match(r'^[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$', guid),
           "%s is not a valid guid" % guid)
    assert(re.match(r'^\d+$', str(buid)),
           "%s is not a valid buid" % buid)

    logger.info("Updating Job Source %s", guid)
    # Make the BusinessUnit and Company
    create_businessunit(buid)
    bu = BusinessUnit.objects.get(id=buid)
    bu.title = name
    bu.save()
    add_company(bu)

    # Lookup the jobs, filter then, transform them, and then load the jobs
    zf = get_jobsfs_zipfile(guid)
    jobs = get_jobs_from_zipfile(zf, guid)
    jobs = filter_current_jobs(jobs, bu)
    jobs = (hr_xml_to_json(job, bu) for job in jobs)
    jobs = (add_redirect(job, bu) for job in jobs)

    # AT&T Showed that large numbers of MOCs can cause import issues due to the size of documents.
    # Therefore, when processing AT&T lower the document chunk size.
    if int(buid) == 19389:
        logger.warn("AT&T has large amounts of mapped_mocs, that cause problems.  Reducing chunk size.")
        upload_chunk_size = 64
    else:
        upload_chunk_size = 1024

    job_ids = add_jobs(jobs, upload_chunk_size)
    remove_expired_jobs(buid, job_ids)

    # Update business information
    bu.associated_jobs = len(job_ids)
    bu.date_updated = datetime.datetime.utcnow()
    bu.save()
    if clear_cache:
        # Clear cache in 25 minutes to allow for solr replication
        tasks.task_clear_bu_cache.delay(buid=bu.id, countdown=1500)
Beispiel #7
0
    def test_dates_have_timezones(self):
        """Assert that dates on imports can be resolved to specifc utc times"""

        with open(self.HRXML_DOC) as f:
            etree = lxml.etree.fromstring(f.read())

        result = transform.hr_xml_to_json(etree, self.bu)

        # Check date_updated
        date_updated = result['date_updated']

        # Assert it has a timezone
        self.assertIsNotNone(date_updated.tzinfo,
                             msg="The date_updated should have a "\
                                 "timezone associated with it.")

        # Assert the datetime is correct when converted to UTC.
        actual_utc = date_updated.astimezone(pytz.UTC)
        expected = datetime.datetime(2015, 12, 23, 06, 48, 11, 533000, pytz.UTC)
        self.assertEqual(actual_utc, expected,
            msg="date_updated is '%s', it should equal '%s'" % (
                date_updated.astimezone(pytz.UTC),
                expected.isoformat()))

        # Check date_created
        date_new = result['date_new']

        # Assert it has a timezone
        self.assertIsNotNone(date_new.tzinfo,
                             msg="The date_new should have a "\
                                 "timezone associated with it.")

        # Assert the datetime is correct when converted to UTC.
        actual_utc = date_new.astimezone(pytz.UTC)
        expected = datetime.datetime(2015, 11, 01, 05, 48, 11, 0, pytz.UTC)
        self.assertEqual(actual_utc, expected,
            msg="date_new is '%s', it should equal '%s'" % (
                date_new.astimezone(pytz.UTC),
                expected.isoformat()))
Beispiel #8
0
def update_job_source(guid, buid, name):
    """Composed method for resopnding to a guid update."""

    assert re.match(r'^[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$', guid.upper()), \
           "%s is not a valid guid" % guid
    assert re.match(r'^\d+$', str(buid)), "%s is not a valid buid" % buid

    logger.info("Updating Job Source %s", guid)
    # Make the BusinessUnit and Company
    create_businessunit(buid)
    bu = BusinessUnit.objects.get(id=buid)
    bu.title = name
    bu.save()
    add_company(bu)

    # Lookup the jobs, filter then, transform them, and then load the jobs
    zf = get_jobsfs_zipfile(guid)
    jobs = get_jobs_from_zipfile(zf, guid)
    jobs = filter_current_jobs(jobs, bu)
    jobs = (hr_xml_to_json(job, bu) for job in jobs)
    jobs = (add_redirect(job, bu) for job in jobs)

    # AT&T Showed that large numbers of MOCs can cause import issues due to the size of documents.
    # Therefore, when processing AT&T lower the document chunk size.
    if int(buid) == 19389:
        logger.warn(
            "AT&T has large amounts of mapped_mocs, that cause problems.  Reducing chunk size."
        )
        upload_chunk_size = 64
    else:
        upload_chunk_size = 1024

    job_ids = add_jobs(jobs, upload_chunk_size)
    remove_expired_jobs(buid, job_ids)

    # Update business information
    bu.associated_jobs = len(job_ids)
    bu.date_updated = datetime.datetime.utcnow()
    bu.save()