Ejemplo n.º 1
0
def cycle():
    while True:
        is_popped = False
        for q in qs:
            if q.is_popped or time.time() - q.last_check_at > 2:
                try:
                    jobs = Job.objects(
                        status=JOB_COMMIT, deleted_at=None,
                        queue_type=q.name).order_by('created_at').all()
                    q.is_popped = False
                    for job in jobs:
                        if q.semaphore.get_value() > 0:
                            job.status = JOB_PROCESSING
                            job.save()

                            job_id = job.id
                            q.semaphore.acquire()
                            p = Process(target=worker,
                                        args=(job_id, q.semaphore))
                            p.start()

                            q.is_popped = True
                        else:
                            q.is_popped = False
                            break
                except Exception, ex:
                    time.sleep(1)
                    print ex

                is_popped |= q.is_popped

        # 如果队列里有数据,
        if not is_popped:
            time.sleep(2)
Ejemplo n.º 2
0
    def setUp(self):
        self.employer_name = 'Test Employer 1'
        self.job_title = 'Test Job Title 1'
        now = datetime.now()
        term = Term.get_term(now.month)
        location = 'Toronto'
        job_levels = ['Junior', 'Intermediate', 'Senior']
        openings = 10
        applicants = 50
        summary = datamanager.test_summary
        programs = ['MATH-Computer Science', 'ENG-Computer', 'ENG-Civil']
        job_url = 'https://testurl.com'

        importer.import_job(employer_name=self.employer_name, job_title=self.job_title, term=term,
                            location=location, levels=job_levels, openings=openings, applicants=applicants,
                            summary=summary, date=now, programs=programs, url=job_url)

        self.employer_name = self.employer_name.lower()
        self.job_title = self.job_title.lower()
        location = location.lower()

        self.employer = Employer.objects(name=self.employer_name).no_dereference().first()

        self.job = Job.objects(id__in=[job.id for job in self.employer.jobs], title=self.job_title).first()

        self.assertEqual(self.employer.name, self.employer_name)
        self.assertEqual(self.employer.overall.rating, 0.0)
        self.assertEqual(self.employer.overall.count, 0)
        self.assertTrue(len(self.employer.warnings) == 0)
        self.assertTrue(len(self.employer.comments) == 0)

        self.assertEqual(self.job.title, self.job_title)
        self.assertEqual(self.job.url, job_url)
        self.assertEqual(self.job.term, term)
        self.assertEqual(self.job.location[0].name, location)
        self.assertTrue(int(round(self.job.location[0].longitude)) == -79)
        self.assertTrue(int(round(self.job.location[0].latitude)) == 44)
        self.assertEqual(self.job.openings, openings)
        self.assertEqual(self.job.remaining, openings)
        self.assertEqual(self.job.hire_rate.rating, 0.0)
        self.assertEqual(self.job.hire_rate.count, 0)
        self.assertEqual(self.job.applicants[0].applicants, applicants)
        self.assertEqual(self.job.applicants[0].date.year, now.year)
        self.assertEqual(self.job.applicants[0].date.month, now.month)
        self.assertEqual(self.job.applicants[0].date.day, now.day)
        self.assertEqual(set(self.job.levels), set(job_levels))
        self.assertTrue(len(self.job.comments) == 0)
        self.assertEqual(set(self.job.programs), set(programs))
        self.assertFalse(self.job.deprecated)
Ejemplo n.º 3
0
def worker(job_id, semaphore):
    start_at = time.time()
    job = Job.objects(id=job_id).first()
    module_name = job.module_name
    function_name = job.function_name
    func = get_function_from_module_and_function_name(module_name,
                                                      function_name)
    arguments = json.loads(job.arguments)
    args = arguments.get('args')
    kwargs = arguments.get('kwargs')

    results = ''
    try:
        results = func(*args, **kwargs)
        job.finish()
    except Exception, ex:
        job.exception()
        exc = traceback.format_exc()
        job.exceptions = exc
Ejemplo n.º 4
0
def comment(job_id):
    data = json.loads(flask.request.data)

    comment_text = data['text']
    comment_date = dateutil.parser.parse(data['date'])
    salary = data['salary'] or 0
    rating = (float(data['rating']) / 5) or 0

    if job_id is not None and comment_text:
        job = Job.objects(id=job_id).first()

        if not job:
            return render_template('404.html')

        logger.info(COMPONENT, 'Adding comment for job: {}'.format(job_id))

        new_comment = Comment(comment=comment_text, date=comment_date, salary=salary, crawled=False,
                              rating=AggregateRating(rating=rating, count=1))

        job.update(push__comments=new_comment)

    return json.dumps({'success': True}), 200, {'ContentType': 'application/json'}
Ejemplo n.º 5
0
def import_comment(**kwargs):
    """Import comment from RateMyCoopJob.

    Keyword arguments:
    employer_name -- Employer name
    job_title -- Title of job
    comments: -- Array of comments
        comment -- Comment
        comment_date -- Date comment was submitted. Note: in non-standard form such as: 5 years ago, 3 weeks ago etc
        salary -- Job salary (hourly)
        rating -- Job rating out of 5 (1 - 5 stars on ratemycoopjob)
    """

    employer_name = kwargs['employer_name'].lower()

    job_title = kwargs['job_title'].lower()

    # If employer alias exists (ex. Research in motion -> Blackberry), use instead
    if employer_name in employer_alias.aliases:
        employer_name = employer_alias.aliases[employer_name].lower()

    # If employer does not exist
    if not Employer.objects.search_text("\"{}\"".format(employer_name)).count() > 0:
        logger.info(COMPONENT, 'Employer: {} does not exist, ignoring..'.format(employer_name))
        return

    logger.info(COMPONENT, 'Importing comments for job: {} from employer: {}'.format(job_title, employer_name))

    employer = Employer.objects.search_text("\"{}\"".format(employer_name)).no_dereference().first()

    # Iterate through all comments
    for index, comment_obj in enumerate(kwargs['comments']):

        comment = comment_obj['comment']

        comment_date = _get_comment_date(comment_obj['comment_date'])

        salary = float(comment_obj['salary'])

        rating = float(comment_obj['rating']) / 5

        # If job does not exist add to employer
        if not employer.job_exists(job_title):
            if employer.comment_exists(comment=comment, date=comment_date, salary=salary, rating=rating):
                logger.info(COMPONENT, 'Comment: {} already exists for employer: {}, ignoring'
                            .format(index, employer_name))

            else:
                logger.info(COMPONENT, 'Adding comment: {} to employer: {}'.format(index, employer_name))

                new_comment = Comment(comment=comment, date=comment_date, salary=salary, crawled=True,
                                      rating=AggregateRating(rating=rating, count=1))

                employer.update(push__comments=new_comment)

        # Job already exists
        else:
            job = Job.objects(id__in=[job.id for job in employer.jobs], title=job_title).first()

            if job.comment_exists(comment=comment, date=comment_date, salary=salary, rating=rating):
                logger.info(COMPONENT, 'Comment: {} already exists for job: {} for employer: {}, ignoring'
                            .format(index, job_title, employer_name))

            else:
                logger.info(COMPONENT, 'Adding comment: {} for job: {} from {}'.format(index, job_title, employer_name))

                new_comment = Comment(comment=comment, date=comment_date, salary=salary, crawled=True,
                                      rating=AggregateRating(rating=rating, count=1))

                job.update(push__comments=new_comment)
Ejemplo n.º 6
0
def import_job(**kwargs):
    """Import job.

    Keyword arguments:
    employer_name -- Employer name
    job_title -- Title of job
    summary -- Job summary
    year -- Year the job was advertised
    term -- Term job was advertised [Fall, Winter, Spring]
    location -- Location job was advertised
    openings -- Number of job openings
    remaining -- Number of job openings remaining
    applicants -- Number of applicants job has (Optional)
    levels -- Levels job is intended for [Junior, Intermediate, Senior]
    programs -- Programs the job is specified for
    url -- URL of job
    date -- Date job was crawled (useful for knowing exactly # of applicants at what time)
    index -- Boolean to indicate whether to index or not (default True)
    """

    employer_name = kwargs['employer_name'].lower()

    job_title = kwargs['job_title'].lower()

    term = kwargs['term']

    levels = []

    for level in kwargs['levels']:
        uw_level = Term.get_level(level)
        if uw_level:
            levels.append(uw_level)
        else:
            logger.error(COMPONENT, 'Error processing level: {}'.format(level))

    programs = []

    for program in kwargs['programs']:
        uw_program = Program.get_program(program)
        if uw_program:
            programs.append(uw_program)
        else:
            logger.error(COMPONENT, 'Error processing program: {}'.format(program))

    location = kwargs['location'].lower()

    openings = int(kwargs['openings'])

    remaining = int(kwargs['remaining']) if 'remaining' in kwargs else openings

    summary = kwargs['summary']

    filtered_summary = engine.filter_summary(summary)

    summary_keywords = engine.get_keywords(filtered_summary, programs)

    date = kwargs['date']

    year = date.year

    url = kwargs['url']

    applicants = 0

    try:
        if kwargs['applicants']:
            applicants = int(kwargs['applicants'])
    except Exception:
        pass

    index = False

    if index in kwargs:
        index = kwargs['index']

    logger.info(COMPONENT, 'Importing job: {} from {}'.format(job_title, employer_name))

    # If employer does not exist, create it
    if not Employer.employer_exists(employer_name):
        logger.info(COMPONENT, 'Employer: {} does not exist, creating..'.format(employer_name))

        employer = Employer(name=employer_name)

        logger.info(COMPONENT, 'Creating job: {}'.format(job_title))

        location = Location(name=location)

        applicant = Applicant(applicants=applicants, date=date)

        keywords = [Keyword(keyword=k['keyword'], types=k['types']) for k in summary_keywords]

        # New job so number of remaining positions is same as openings
        job = Job(title=job_title, summary=filtered_summary, year=year,
                  term=term, location=[location], openings=openings, remaining=remaining,
                  applicants=[applicant], levels=levels, programs=programs, url=url,
                  keywords=keywords)

        job.save()
        job.reload()

        employer.jobs.append(job)
        employer.save()
        employer.reload()

        if index:
            elastic.index_employer_waterlooworks(employer)
            elastic.index_job_waterlooworks(employer, job)

    # Employer already exists
    else:
        employer = Employer.objects(name=employer_name).no_dereference().first()

        logger.info(COMPONENT, 'Employer: {} already exists'.format(employer_name))

        # If job does not exist, create it
        if not employer.job_exists(job_title):
            logger.info(COMPONENT, 'Creating job: {}'.format(job_title))

            location = Location(name=location)

            applicant = Applicant(applicants=applicants, date=date)

            keywords = [Keyword(keyword=k['keyword'], types=k['types']) for k in summary_keywords]

            # New job so number of remaining positions is same as openings
            job = Job(title=job_title, summary=engine.filter_summary(summary), year=year,
                      term=term, location=[location], openings=openings, remaining=remaining,
                      applicants=[applicant], levels=levels, programs=programs, url=url,
                      keywords=keywords)

            job.save()
            job.reload()

            employer.update(push__jobs=job)

            if index:
                elastic.update_employer_waterlooworks(employer)
                elastic.index_job_waterlooworks(employer, job)

        # Job already exists
        else:
            logger.info(COMPONENT, 'Job: {} already exists'.format(job_title))

            job = Job.objects(id__in=[job.id for job in employer.jobs], title=job_title).first()

            if not year >= job.year:
                raise DataIntegrityError('Job: {} by {} cannot be advertised before {}'
                                         .format(job_title, employer_name, job.year))

            filtered_summary_compare = re.sub(r'\W+', '', filtered_summary.lower().strip()).strip()
            job_summary_compare = re.sub(r'\W+', '', job.summary.lower().strip()).strip()

            # Job summary is not the same. In this case the employer most likely changed the job
            if not filtered_summary_compare == job_summary_compare:

                if openings >= 1:
                    logger.info(COMPONENT, 'Job: {}: different summary detected, deprecating and creating new job..'
                                .format(job_title))

                    job.update(set__deprecated=True)

                    location = Location(name=location)

                    applicant = Applicant(applicants=applicants, date=date)

                    keywords = [Keyword(keyword=k['keyword'], types=k['types']) for k in summary_keywords]

                    # Assume new job so number of remaining positions is same as openings
                    new_job = Job(title=job_title, summary=filtered_summary, year=year, term=term,
                                  location=[location], openings=openings, remaining=remaining, applicants=[applicant],
                                  levels=levels, programs=programs, url=url, keywords=keywords)

                    new_job.save()
                    new_job.reload()

                    employer.update(push__jobs=new_job)

                    if index:
                        elastic.delete_employer_waterlooworks(employer)
                        elastic.delete_job_waterlooworks(employer, job)
                        elastic.index_employer_waterlooworks(employer)
                        elastic.index_job_waterlooworks(employer, new_job)
                else:
                    logger.info(COMPONENT, 'Job: {}: different summary detected but invalid openings: {}, ignoring..'
                                .format(job_title, openings))

            # Job is the same (same title and description)
            else:
                # If job is being advertised in new term
                if year != job.year or term != job.term:
                    logger.info(COMPONENT, 'Job: {}: being advertised in new term, updating..'.format(job_title))

                    # Add hire ratio for previous term
                    hire_ratio = float(job.openings - job.remaining) / job.openings
                    
                    job.hire_rate.add_rating(hire_ratio)

                    location = Location(name=location)

                    applicant = Applicant(applicants=applicants, date=date)

                    hire_rate = AggregateRating(rating=job.hire_rate.rating, count=job.hire_rate.count)
                    
                    job.update(set__year=year, set__term=term, add_to_set__location=location, set__openings=openings,
                               set__remaining=remaining, push__applicants=applicant, set__hire_rate=hire_rate,
                               set__levels=levels, set__programs=programs, set__url=url, set__last_indexed=datetime.now())

                    if index:
                        elastic.update_job_waterlooworks(employer, job)

                # Job is being updated. We need to update location, openings, levels, remaining, hire_rate, applicants
                else:
                    logger.info(COMPONENT, 'Job: {}: updating for current term'.format(job_title))

                    remaining = job.remaining

                    # Job posting has decreased, some positions filled up
                    if openings < remaining:
                        remaining = openings

                    location = Location(name=location)

                    applicant = Applicant(applicants=applicants, date=date)

                    job.update(add_to_set__location=location, set__remaining=remaining,
                               set__levels=list(set(levels + job.levels)), push__applicants=applicant,
                               set__programs=list(set(programs + job.programs)), set__url=url,
                               set__last_indexed=datetime.now())

                    if index:
                        elastic.update_job_waterlooworks(employer, job)
Ejemplo n.º 7
0
def update_job(**kwargs):
    """Update job.

    Keyword arguments:
    id -- Job ID
    summary -- Job summary
    location -- Location job was advertised
    programs -- Programs the job is specified for
    levels -- Levels job is intended for [Junior, Intermediate, Senior]
    openings -- Number of job openings
    index -- Boolean to indicate whether to index or not (default True)
    """

    summary = kwargs['summary']

    location = kwargs['location'].lower()

    levels = kwargs['levels']

    programs = []

    for program in kwargs['programs']:
        uw_program = Program.get_program(program)
        if uw_program:
            programs.append(uw_program)
        else:
            logger.error(COMPONENT, 'Error processing program: {}'.format(program))

    openings = 0

    try:
        if kwargs['openings']:
            openings = int(kwargs['openings']) or 0
    except Exception:
        pass

    index = False

    if index in kwargs:
        index = kwargs['index']

    job = Job.objects(id=kwargs['id']).first()

    remaining = job.openings

    # Job posting has decreased, some positions filled up
    if openings < job.openings:
        remaining = openings

    filtered_summary = engine.filter_summary(summary)

    summary_keywords = engine.get_keywords(filtered_summary, programs)

    filtered_summary_compare = re.sub(r'\W+', '', filtered_summary.lower().strip()).strip()
    job_summary_compare = re.sub(r'\W+', '', job.summary.lower().strip()).strip()

    employer = Employer.objects(jobs=kwargs['id']).first()

    # Job summary is not the same. In this case the employer most likely changed the job
    if not filtered_summary_compare == job_summary_compare:

        if openings >= 1:
            logger.info(COMPONENT, 'Job: {}: different summary detected, deprecating and creating new job..'
                        .format(kwargs['id']))

            job.update(set__deprecated=True)

            location = Location(name=location)

            keywords = [Keyword(keyword=k['keyword'], types=k['types']) for k in summary_keywords]

            # Assume new job so number of remaining positions is same as openings
            new_job = Job(title=job.title, summary=filtered_summary, year=job.year, term=job.term,
                          location=[location], openings=openings, remaining=openings,
                          levels=levels, programs=programs, url=job.url, keywords=keywords)

            new_job.save()

            employer.update(push__jobs=new_job)

            if index:
                elastic.delete_employer_waterlooworks(employer)
                elastic.delete_job_waterlooworks(employer, job)
                elastic.index_employer_waterlooworks(employer)
                elastic.index_job_waterlooworks(employer, new_job)
        else:
            logger.info(COMPONENT, 'Job: {}: different summary detected but invalid openings: {}, ignoring..'
                        .format(job.title, openings))
    else:
        logger.info(COMPONENT, 'Job: {}: updating for current term'.format(kwargs['id']))

        location = Location(name=location)

        job.update(add_to_set__location=location, set__remaining=remaining,
                   set__levels=list(set(levels + job.levels)),
                   set__programs=list(set(programs + job.programs)), set__last_indexed=datetime.now())

        if index:
            elastic.update_job_waterlooworks(employer, job)
Ejemplo n.º 8
0
def import_comment(**kwargs):
    """Import comment from RateMyCoopJob.

    Keyword arguments:
    employer_name -- Employer name
    job_title -- Title of job
    comments: -- Array of comments
        comment -- Comment
        comment_date -- Date comment was submitted. Note: in non-standard form such as: 5 years ago, 3 weeks ago etc
        salary -- Job salary (hourly)
        rating -- Job rating out of 5 (1 - 5 stars on ratemycoopjob)
    """

    employer_name = kwargs['employer_name'].lower()

    job_title = kwargs['job_title'].lower()

    # If employer alias exists (ex. Research in motion -> Blackberry), use instead
    if employer_name in employer_alias.aliases:
        employer_name = employer_alias.aliases[employer_name].lower()

    # If employer does not exist
    if not Employer.objects.search_text(
            "\"{}\"".format(employer_name)).count() > 0:
        logger.info(
            COMPONENT,
            'Employer: {} does not exist, ignoring..'.format(employer_name))
        return

    logger.info(
        COMPONENT, 'Importing comments for job: {} from employer: {}'.format(
            job_title, employer_name))

    employer = Employer.objects.search_text(
        "\"{}\"".format(employer_name)).no_dereference().first()

    # Iterate through all comments
    for index, comment_obj in enumerate(kwargs['comments']):

        comment = comment_obj['comment']

        comment_date = _get_comment_date(comment_obj['comment_date'])

        salary = float(comment_obj['salary'])

        rating = float(comment_obj['rating']) / 5

        # If job does not exist add to employer
        if not employer.job_exists(job_title):
            if employer.comment_exists(comment=comment,
                                       date=comment_date,
                                       salary=salary,
                                       rating=rating):
                logger.info(
                    COMPONENT,
                    'Comment: {} already exists for employer: {}, ignoring'.
                    format(index, employer_name))

            else:
                logger.info(
                    COMPONENT, 'Adding comment: {} to employer: {}'.format(
                        index, employer_name))

                new_comment = Comment(comment=comment,
                                      date=comment_date,
                                      salary=salary,
                                      crawled=True,
                                      rating=AggregateRating(rating=rating,
                                                             count=1))

                employer.update(push__comments=new_comment)

        # Job already exists
        else:
            job = Job.objects(id__in=[job.id for job in employer.jobs],
                              title=job_title).first()

            if job.comment_exists(comment=comment,
                                  date=comment_date,
                                  salary=salary,
                                  rating=rating):
                logger.info(
                    COMPONENT,
                    'Comment: {} already exists for job: {} for employer: {}, ignoring'
                    .format(index, job_title, employer_name))

            else:
                logger.info(
                    COMPONENT, 'Adding comment: {} for job: {} from {}'.format(
                        index, job_title, employer_name))

                new_comment = Comment(comment=comment,
                                      date=comment_date,
                                      salary=salary,
                                      crawled=True,
                                      rating=AggregateRating(rating=rating,
                                                             count=1))

                job.update(push__comments=new_comment)
Ejemplo n.º 9
0
def update_job(**kwargs):
    """Update job.

    Keyword arguments:
    id -- Job ID
    summary -- Job summary
    location -- Location job was advertised
    programs -- Programs the job is specified for
    levels -- Levels job is intended for [Junior, Intermediate, Senior]
    openings -- Number of job openings
    index -- Boolean to indicate whether to index or not (default True)
    """

    summary = kwargs['summary']

    location = kwargs['location'].lower()

    levels = kwargs['levels']

    programs = []

    for program in kwargs['programs']:
        uw_program = Program.get_program(program)
        if uw_program:
            programs.append(uw_program)
        else:
            logger.error(COMPONENT,
                         'Error processing program: {}'.format(program))

    openings = 0

    try:
        if kwargs['openings']:
            openings = int(kwargs['openings']) or 0
    except Exception:
        pass

    index = False

    if index in kwargs:
        index = kwargs['index']

    job = Job.objects(id=kwargs['id']).first()

    remaining = job.openings

    # Job posting has decreased, some positions filled up
    if openings < job.openings:
        remaining = openings

    filtered_summary = engine.filter_summary(summary)

    summary_keywords = engine.get_keywords(filtered_summary, programs)

    filtered_summary_compare = re.sub(
        r'\W+', '',
        filtered_summary.lower().strip()).strip()
    job_summary_compare = re.sub(r'\W+', '',
                                 job.summary.lower().strip()).strip()

    employer = Employer.objects(jobs=kwargs['id']).first()

    # Job summary is not the same. In this case the employer most likely changed the job
    if not filtered_summary_compare == job_summary_compare:

        if openings >= 1:
            logger.info(
                COMPONENT,
                'Job: {}: different summary detected, deprecating and creating new job..'
                .format(kwargs['id']))

            job.update(set__deprecated=True)

            location = Location(name=location)

            keywords = [
                Keyword(keyword=k['keyword'], types=k['types'])
                for k in summary_keywords
            ]

            # Assume new job so number of remaining positions is same as openings
            new_job = Job(title=job.title,
                          summary=filtered_summary,
                          year=job.year,
                          term=job.term,
                          location=[location],
                          openings=openings,
                          remaining=openings,
                          levels=levels,
                          programs=programs,
                          url=job.url,
                          keywords=keywords)

            new_job.save()

            employer.update(push__jobs=new_job)

            if index:
                elastic.delete_employer_waterlooworks(employer)
                elastic.delete_job_waterlooworks(employer, job)
                elastic.index_employer_waterlooworks(employer)
                elastic.index_job_waterlooworks(employer, new_job)
        else:
            logger.info(
                COMPONENT,
                'Job: {}: different summary detected but invalid openings: {}, ignoring..'
                .format(job.title, openings))
    else:
        logger.info(COMPONENT,
                    'Job: {}: updating for current term'.format(kwargs['id']))

        location = Location(name=location)

        job.update(add_to_set__location=location,
                   set__remaining=remaining,
                   set__levels=list(set(levels + job.levels)),
                   set__programs=list(set(programs + job.programs)),
                   set__last_indexed=datetime.now())

        if index:
            elastic.update_job_waterlooworks(employer, job)
Ejemplo n.º 10
0
def import_job(**kwargs):
    """Import job.

    Keyword arguments:
    employer_name -- Employer name
    job_title -- Title of job
    summary -- Job summary
    year -- Year the job was advertised
    term -- Term job was advertised [Fall, Winter, Spring]
    location -- Location job was advertised
    openings -- Number of job openings
    remaining -- Number of job openings remaining
    applicants -- Number of applicants job has (Optional)
    levels -- Levels job is intended for [Junior, Intermediate, Senior]
    programs -- Programs the job is specified for
    url -- URL of job
    date -- Date job was crawled (useful for knowing exactly # of applicants at what time)
    index -- Boolean to indicate whether to index or not (default True)
    """

    employer_name = kwargs['employer_name'].lower()

    job_title = kwargs['job_title'].lower()

    term = kwargs['term']

    levels = []

    for level in kwargs['levels']:
        uw_level = Term.get_level(level)
        if uw_level:
            levels.append(uw_level)
        else:
            logger.error(COMPONENT, 'Error processing level: {}'.format(level))

    programs = []

    for program in kwargs['programs']:
        uw_program = Program.get_program(program)
        if uw_program:
            programs.append(uw_program)
        else:
            logger.error(COMPONENT,
                         'Error processing program: {}'.format(program))

    location = kwargs['location'].lower()

    openings = int(kwargs['openings'])

    remaining = int(kwargs['remaining']) if 'remaining' in kwargs else openings

    summary = kwargs['summary']

    filtered_summary = engine.filter_summary(summary)

    summary_keywords = engine.get_keywords(filtered_summary, programs)

    date = kwargs['date']

    year = date.year

    url = kwargs['url']

    applicants = 0

    try:
        if kwargs['applicants']:
            applicants = int(kwargs['applicants'])
    except Exception:
        pass

    index = False

    if index in kwargs:
        index = kwargs['index']

    logger.info(COMPONENT,
                'Importing job: {} from {}'.format(job_title, employer_name))

    # If employer does not exist, create it
    if not Employer.employer_exists(employer_name):
        logger.info(
            COMPONENT,
            'Employer: {} does not exist, creating..'.format(employer_name))

        employer = Employer(name=employer_name)

        logger.info(COMPONENT, 'Creating job: {}'.format(job_title))

        location = Location(name=location)

        applicant = Applicant(applicants=applicants, date=date)

        keywords = [
            Keyword(keyword=k['keyword'], types=k['types'])
            for k in summary_keywords
        ]

        # New job so number of remaining positions is same as openings
        job = Job(title=job_title,
                  summary=filtered_summary,
                  year=year,
                  term=term,
                  location=[location],
                  openings=openings,
                  remaining=remaining,
                  applicants=[applicant],
                  levels=levels,
                  programs=programs,
                  url=url,
                  keywords=keywords)

        job.save()
        job.reload()

        employer.jobs.append(job)
        employer.save()
        employer.reload()

        if index:
            elastic.index_employer_waterlooworks(employer)
            elastic.index_job_waterlooworks(employer, job)

    # Employer already exists
    else:
        employer = Employer.objects(
            name=employer_name).no_dereference().first()

        logger.info(COMPONENT,
                    'Employer: {} already exists'.format(employer_name))

        # If job does not exist, create it
        if not employer.job_exists(job_title):
            logger.info(COMPONENT, 'Creating job: {}'.format(job_title))

            location = Location(name=location)

            applicant = Applicant(applicants=applicants, date=date)

            keywords = [
                Keyword(keyword=k['keyword'], types=k['types'])
                for k in summary_keywords
            ]

            # New job so number of remaining positions is same as openings
            job = Job(title=job_title,
                      summary=engine.filter_summary(summary),
                      year=year,
                      term=term,
                      location=[location],
                      openings=openings,
                      remaining=remaining,
                      applicants=[applicant],
                      levels=levels,
                      programs=programs,
                      url=url,
                      keywords=keywords)

            job.save()
            job.reload()

            employer.update(push__jobs=job)

            if index:
                elastic.update_employer_waterlooworks(employer)
                elastic.index_job_waterlooworks(employer, job)

        # Job already exists
        else:
            logger.info(COMPONENT, 'Job: {} already exists'.format(job_title))

            job = Job.objects(id__in=[job.id for job in employer.jobs],
                              title=job_title).first()

            if not year >= job.year:
                raise DataIntegrityError(
                    'Job: {} by {} cannot be advertised before {}'.format(
                        job_title, employer_name, job.year))

            filtered_summary_compare = re.sub(
                r'\W+', '',
                filtered_summary.lower().strip()).strip()
            job_summary_compare = re.sub(r'\W+', '',
                                         job.summary.lower().strip()).strip()

            # Job summary is not the same. In this case the employer most likely changed the job
            if not filtered_summary_compare == job_summary_compare:

                if openings >= 1:
                    logger.info(
                        COMPONENT,
                        'Job: {}: different summary detected, deprecating and creating new job..'
                        .format(job_title))

                    job.update(set__deprecated=True)

                    location = Location(name=location)

                    applicant = Applicant(applicants=applicants, date=date)

                    keywords = [
                        Keyword(keyword=k['keyword'], types=k['types'])
                        for k in summary_keywords
                    ]

                    # Assume new job so number of remaining positions is same as openings
                    new_job = Job(title=job_title,
                                  summary=filtered_summary,
                                  year=year,
                                  term=term,
                                  location=[location],
                                  openings=openings,
                                  remaining=remaining,
                                  applicants=[applicant],
                                  levels=levels,
                                  programs=programs,
                                  url=url,
                                  keywords=keywords)

                    new_job.save()
                    new_job.reload()

                    employer.update(push__jobs=new_job)

                    if index:
                        elastic.delete_employer_waterlooworks(employer)
                        elastic.delete_job_waterlooworks(employer, job)
                        elastic.index_employer_waterlooworks(employer)
                        elastic.index_job_waterlooworks(employer, new_job)
                else:
                    logger.info(
                        COMPONENT,
                        'Job: {}: different summary detected but invalid openings: {}, ignoring..'
                        .format(job_title, openings))

            # Job is the same (same title and description)
            else:
                # If job is being advertised in new term
                if year != job.year or term != job.term:
                    logger.info(
                        COMPONENT,
                        'Job: {}: being advertised in new term, updating..'.
                        format(job_title))

                    # Add hire ratio for previous term
                    hire_ratio = float(job.openings -
                                       job.remaining) / job.openings

                    job.hire_rate.add_rating(hire_ratio)

                    location = Location(name=location)

                    applicant = Applicant(applicants=applicants, date=date)

                    hire_rate = AggregateRating(rating=job.hire_rate.rating,
                                                count=job.hire_rate.count)

                    job.update(set__year=year,
                               set__term=term,
                               add_to_set__location=location,
                               set__openings=openings,
                               set__remaining=remaining,
                               push__applicants=applicant,
                               set__hire_rate=hire_rate,
                               set__levels=levels,
                               set__programs=programs,
                               set__url=url,
                               set__last_indexed=datetime.now())

                    if index:
                        elastic.update_job_waterlooworks(employer, job)

                # Job is being updated. We need to update location, openings, levels, remaining, hire_rate, applicants
                else:
                    logger.info(
                        COMPONENT,
                        'Job: {}: updating for current term'.format(job_title))

                    remaining = job.remaining

                    # Job posting has decreased, some positions filled up
                    if openings < remaining:
                        remaining = openings

                    location = Location(name=location)

                    applicant = Applicant(applicants=applicants, date=date)

                    job.update(add_to_set__location=location,
                               set__remaining=remaining,
                               set__levels=list(set(levels + job.levels)),
                               push__applicants=applicant,
                               set__programs=list(set(programs +
                                                      job.programs)),
                               set__url=url,
                               set__last_indexed=datetime.now())

                    if index:
                        elastic.update_job_waterlooworks(employer, job)
Ejemplo n.º 11
0
    def get(self):
        args = request.args
        job_class = args.get("job_class")
        page = int(args.get("page"))
        limit = int(args.get("limit"))
        option = args.get("option")

        job_list = []
        if option == 'all':
            if not job_class or job_class == '0':
                rn = Job.objects().all()
            else:
                rn = Job.objects(classes=int(job_class)).all()
            obj = Job.pagination(page=page, limit=limit, rn=rn)
            for job in obj.get("data"):
                create_time = job.created_at.strftime("%Y-%m-%d %H:%M:%S")
                job_list.append({
                    "name": job.name,
                    'department': job.department,
                    'time': create_time,
                    'classes': JOB_CLASS.get(job.classes),
                    'location': job.location,
                    'id': job.id
                })
        else:
            experience = args.get("experience")
            category = args.get("category")
            data = {'state': 100}
            if job_class and job_class != '0':
                data['classes'] = int(job_class)

            if experience and experience != '0':
                data['experience'] = int(experience)

            if category and category != '0':
                data['category'] = int(category)

            rn = Job.objects(**data).all()
            obj = Job.pagination(page=page, limit=limit, rn=rn)
            for job in obj.get("data"):
                create_time = job.created_at.strftime("%Y-%m-%d")
                job_list.append({
                    "name":
                    job.name,
                    'time':
                    create_time,
                    'department':
                    job.department,
                    'classes':
                    JOB_CLASS.get(job.classes),
                    'location':
                    job.location,
                    'salary':
                    "{0}-{1}k".format(job.salary_start, job.salary_end),
                    'experience':
                    EXPERIENCE.get(job.experience),
                    'education':
                    LEVEL.get(job.education),
                    'temptation':
                    job.temptation,
                    'id':
                    job.id
                })

        obj['data'] = job_list

        return success(res={'data': job_list})
Ejemplo n.º 12
0
def display_job():
    employer_name = flask.request.args.get('employer') or ''
    job_title = flask.request.args.get('title') or ''

    employer = Employer.objects(name=employer_name).no_dereference().first()

    job = Job.objects(id__in=[job.id for job in employer.jobs], title=job_title, deprecated=False).first()

    if not employer or not job:
        return render_template('404.html')

    summary = job.summary.strip('-').strip('_').strip('-').strip('_').strip().replace('\n', '<br>')\
        .replace('\r\n', '<br>')

    keywords = []

    for keyword in job.keywords:
        color = '#949FB1'

        if keyword.keyword in colors.colors and colors.colors[keyword.keyword]['color']:
            color = colors.colors[keyword.keyword]['color']

        keywords.append({
            'keyword': keyword.keyword,
            'color': color
        })

    job_applicants = 0

    applicants = {}

    if len(applicants) > 0:
        for applicant in job.applicants:
            applicants[applicant.date] = applicant.applicants,

        now = datetime.now()

        earliest = max(date for date in applicants if date < now)

        job_applicants = applicants[earliest][0]

    comments = []

    for comment in job.comments:
        comments.append({
            'comment': comment.comment,
            'date': comment.date.isoformat(),
            'salary': comment.salary,
            'rating': comment.rating.rating * 5,
            'crawled': comment.crawled
        })

    job_data = {
        'employer_name': string.capwords(employer.name),
        'job_id': job.id,
        'job_title': string.capwords(job.title),
        'job_term': job.term,
        'job_year': job.year,
        'job_summary': summary,
        'job_locations': [string.capwords(location.name) for location in job.location],
        'job_openings': job.openings,
        'job_remaining': job.remaining,
        'job_hire_rate': int(job.hire_rate.rating * 100),
        'job_programs': job.programs,
        'job_levels': job.levels,
        'job_keywords': keywords,
        'job_applicants': job_applicants
    }

    return render_template('job.html', job_data=job_data, comments=comments, page_script='job')