예제 #1
0
        def _decorate(*args, **kwargs):
            arguments = json.dumps(dict(args=args, kwargs=kwargs))  # 简单参数类型,不用过于复杂

            # 任务放入mongodb
            job = Job()
            job.module_name = module_name
            job.function_name = function_name
            job.arguments = arguments
            job.queue_type = queue_key
            job.save()
예제 #2
0
def create_job():
    try:
        job_data = request.json
        job_load = JobSchema().load(job_data)
        job = Job(**job_load)
        job.save()
        job_dump = JobSchema().dump(job)
        return make_response(jsonify(job_dump), 201)
    except ValidationError as err:
        return make_response(jsonify(err.messages), 500)
    except SQLAlchemyError as err:
        logging.error('Error in Job creation: {}'.format(list(err.args)))
        return make_response(
            jsonify({
                'Error Message':
                'There was an error, we coudn\'t create the Job'
            }), 500)
예제 #3
0
    def run(self):
        Employer.objects.delete()
        employers = self.get_employers()
        for e in employers:
            print "Creating employer %s" % e
            employer = Employer(**e)
            employer.save()

        Student.objects.delete()
        students = self.get_students()

        experiences = self.get_experiences()
        for i in range(len(experiences)):
            experience_list = experiences[i]
            s = students[i]
            for e in experience_list:
                s.experience = [Experience(**e)]
                s.save()

        educations = self.get_educations()
        for s in students:
            education = choice(educations)
            s.education = [Education(**education)]
            s.save()

        employers = Employer.find({})
        jobs = self.get_jobs()
        for i in range(len(jobs)):
            j = jobs[i]
            e = employers[i]
            j['employer_id'] = e['id']
            job = Job(**j)
            job.save()

        jobs = Job.find({})

        self.save_applications(jobs, students)
예제 #4
0
파일: importer.py 프로젝트: Mo-Talha/Nomad
def import_job(**kwargs):
    """Import job.

    Keyword arguments:
    employer_name -- Employer name
    job_title -- Title of job
    summary -- Job summary
    year -- Year the job was advertised
    term -- Term job was advertised [Fall, Winter, Spring]
    location -- Location job was advertised
    openings -- Number of job openings
    remaining -- Number of job openings remaining
    applicants -- Number of applicants job has (Optional)
    levels -- Levels job is intended for [Junior, Intermediate, Senior]
    programs -- Programs the job is specified for
    url -- URL of job
    date -- Date job was crawled (useful for knowing exactly # of applicants at what time)
    index -- Boolean to indicate whether to index or not (default True)
    """

    employer_name = kwargs['employer_name'].lower()

    job_title = kwargs['job_title'].lower()

    term = kwargs['term']

    levels = []

    for level in kwargs['levels']:
        uw_level = Term.get_level(level)
        if uw_level:
            levels.append(uw_level)
        else:
            logger.error(COMPONENT, 'Error processing level: {}'.format(level))

    programs = []

    for program in kwargs['programs']:
        uw_program = Program.get_program(program)
        if uw_program:
            programs.append(uw_program)
        else:
            logger.error(COMPONENT, 'Error processing program: {}'.format(program))

    location = kwargs['location'].lower()

    openings = int(kwargs['openings'])

    remaining = int(kwargs['remaining']) if 'remaining' in kwargs else openings

    summary = kwargs['summary']

    filtered_summary = engine.filter_summary(summary)

    summary_keywords = engine.get_keywords(filtered_summary, programs)

    date = kwargs['date']

    year = date.year

    url = kwargs['url']

    applicants = 0

    try:
        if kwargs['applicants']:
            applicants = int(kwargs['applicants'])
    except Exception:
        pass

    index = False

    if index in kwargs:
        index = kwargs['index']

    logger.info(COMPONENT, 'Importing job: {} from {}'.format(job_title, employer_name))

    # If employer does not exist, create it
    if not Employer.employer_exists(employer_name):
        logger.info(COMPONENT, 'Employer: {} does not exist, creating..'.format(employer_name))

        employer = Employer(name=employer_name)

        logger.info(COMPONENT, 'Creating job: {}'.format(job_title))

        location = Location(name=location)

        applicant = Applicant(applicants=applicants, date=date)

        keywords = [Keyword(keyword=k['keyword'], types=k['types']) for k in summary_keywords]

        # New job so number of remaining positions is same as openings
        job = Job(title=job_title, summary=filtered_summary, year=year,
                  term=term, location=[location], openings=openings, remaining=remaining,
                  applicants=[applicant], levels=levels, programs=programs, url=url,
                  keywords=keywords)

        job.save()
        job.reload()

        employer.jobs.append(job)
        employer.save()
        employer.reload()

        if index:
            elastic.index_employer_waterlooworks(employer)
            elastic.index_job_waterlooworks(employer, job)

    # Employer already exists
    else:
        employer = Employer.objects(name=employer_name).no_dereference().first()

        logger.info(COMPONENT, 'Employer: {} already exists'.format(employer_name))

        # If job does not exist, create it
        if not employer.job_exists(job_title):
            logger.info(COMPONENT, 'Creating job: {}'.format(job_title))

            location = Location(name=location)

            applicant = Applicant(applicants=applicants, date=date)

            keywords = [Keyword(keyword=k['keyword'], types=k['types']) for k in summary_keywords]

            # New job so number of remaining positions is same as openings
            job = Job(title=job_title, summary=engine.filter_summary(summary), year=year,
                      term=term, location=[location], openings=openings, remaining=remaining,
                      applicants=[applicant], levels=levels, programs=programs, url=url,
                      keywords=keywords)

            job.save()
            job.reload()

            employer.update(push__jobs=job)

            if index:
                elastic.update_employer_waterlooworks(employer)
                elastic.index_job_waterlooworks(employer, job)

        # Job already exists
        else:
            logger.info(COMPONENT, 'Job: {} already exists'.format(job_title))

            job = Job.objects(id__in=[job.id for job in employer.jobs], title=job_title).first()

            if not year >= job.year:
                raise DataIntegrityError('Job: {} by {} cannot be advertised before {}'
                                         .format(job_title, employer_name, job.year))

            filtered_summary_compare = re.sub(r'\W+', '', filtered_summary.lower().strip()).strip()
            job_summary_compare = re.sub(r'\W+', '', job.summary.lower().strip()).strip()

            # Job summary is not the same. In this case the employer most likely changed the job
            if not filtered_summary_compare == job_summary_compare:

                if openings >= 1:
                    logger.info(COMPONENT, 'Job: {}: different summary detected, deprecating and creating new job..'
                                .format(job_title))

                    job.update(set__deprecated=True)

                    location = Location(name=location)

                    applicant = Applicant(applicants=applicants, date=date)

                    keywords = [Keyword(keyword=k['keyword'], types=k['types']) for k in summary_keywords]

                    # Assume new job so number of remaining positions is same as openings
                    new_job = Job(title=job_title, summary=filtered_summary, year=year, term=term,
                                  location=[location], openings=openings, remaining=remaining, applicants=[applicant],
                                  levels=levels, programs=programs, url=url, keywords=keywords)

                    new_job.save()
                    new_job.reload()

                    employer.update(push__jobs=new_job)

                    if index:
                        elastic.delete_employer_waterlooworks(employer)
                        elastic.delete_job_waterlooworks(employer, job)
                        elastic.index_employer_waterlooworks(employer)
                        elastic.index_job_waterlooworks(employer, new_job)
                else:
                    logger.info(COMPONENT, 'Job: {}: different summary detected but invalid openings: {}, ignoring..'
                                .format(job_title, openings))

            # Job is the same (same title and description)
            else:
                # If job is being advertised in new term
                if year != job.year or term != job.term:
                    logger.info(COMPONENT, 'Job: {}: being advertised in new term, updating..'.format(job_title))

                    # Add hire ratio for previous term
                    hire_ratio = float(job.openings - job.remaining) / job.openings
                    
                    job.hire_rate.add_rating(hire_ratio)

                    location = Location(name=location)

                    applicant = Applicant(applicants=applicants, date=date)

                    hire_rate = AggregateRating(rating=job.hire_rate.rating, count=job.hire_rate.count)
                    
                    job.update(set__year=year, set__term=term, add_to_set__location=location, set__openings=openings,
                               set__remaining=remaining, push__applicants=applicant, set__hire_rate=hire_rate,
                               set__levels=levels, set__programs=programs, set__url=url, set__last_indexed=datetime.now())

                    if index:
                        elastic.update_job_waterlooworks(employer, job)

                # Job is being updated. We need to update location, openings, levels, remaining, hire_rate, applicants
                else:
                    logger.info(COMPONENT, 'Job: {}: updating for current term'.format(job_title))

                    remaining = job.remaining

                    # Job posting has decreased, some positions filled up
                    if openings < remaining:
                        remaining = openings

                    location = Location(name=location)

                    applicant = Applicant(applicants=applicants, date=date)

                    job.update(add_to_set__location=location, set__remaining=remaining,
                               set__levels=list(set(levels + job.levels)), push__applicants=applicant,
                               set__programs=list(set(programs + job.programs)), set__url=url,
                               set__last_indexed=datetime.now())

                    if index:
                        elastic.update_job_waterlooworks(employer, job)
예제 #5
0
파일: importer.py 프로젝트: Mo-Talha/Nomad
def update_job(**kwargs):
    """Update job.

    Keyword arguments:
    id -- Job ID
    summary -- Job summary
    location -- Location job was advertised
    programs -- Programs the job is specified for
    levels -- Levels job is intended for [Junior, Intermediate, Senior]
    openings -- Number of job openings
    index -- Boolean to indicate whether to index or not (default True)
    """

    summary = kwargs['summary']

    location = kwargs['location'].lower()

    levels = kwargs['levels']

    programs = []

    for program in kwargs['programs']:
        uw_program = Program.get_program(program)
        if uw_program:
            programs.append(uw_program)
        else:
            logger.error(COMPONENT, 'Error processing program: {}'.format(program))

    openings = 0

    try:
        if kwargs['openings']:
            openings = int(kwargs['openings']) or 0
    except Exception:
        pass

    index = False

    if index in kwargs:
        index = kwargs['index']

    job = Job.objects(id=kwargs['id']).first()

    remaining = job.openings

    # Job posting has decreased, some positions filled up
    if openings < job.openings:
        remaining = openings

    filtered_summary = engine.filter_summary(summary)

    summary_keywords = engine.get_keywords(filtered_summary, programs)

    filtered_summary_compare = re.sub(r'\W+', '', filtered_summary.lower().strip()).strip()
    job_summary_compare = re.sub(r'\W+', '', job.summary.lower().strip()).strip()

    employer = Employer.objects(jobs=kwargs['id']).first()

    # Job summary is not the same. In this case the employer most likely changed the job
    if not filtered_summary_compare == job_summary_compare:

        if openings >= 1:
            logger.info(COMPONENT, 'Job: {}: different summary detected, deprecating and creating new job..'
                        .format(kwargs['id']))

            job.update(set__deprecated=True)

            location = Location(name=location)

            keywords = [Keyword(keyword=k['keyword'], types=k['types']) for k in summary_keywords]

            # Assume new job so number of remaining positions is same as openings
            new_job = Job(title=job.title, summary=filtered_summary, year=job.year, term=job.term,
                          location=[location], openings=openings, remaining=openings,
                          levels=levels, programs=programs, url=job.url, keywords=keywords)

            new_job.save()

            employer.update(push__jobs=new_job)

            if index:
                elastic.delete_employer_waterlooworks(employer)
                elastic.delete_job_waterlooworks(employer, job)
                elastic.index_employer_waterlooworks(employer)
                elastic.index_job_waterlooworks(employer, new_job)
        else:
            logger.info(COMPONENT, 'Job: {}: different summary detected but invalid openings: {}, ignoring..'
                        .format(job.title, openings))
    else:
        logger.info(COMPONENT, 'Job: {}: updating for current term'.format(kwargs['id']))

        location = Location(name=location)

        job.update(add_to_set__location=location, set__remaining=remaining,
                   set__levels=list(set(levels + job.levels)),
                   set__programs=list(set(programs + job.programs)), set__last_indexed=datetime.now())

        if index:
            elastic.update_job_waterlooworks(employer, job)
예제 #6
0
def update_job(**kwargs):
    """Update job.

    Keyword arguments:
    id -- Job ID
    summary -- Job summary
    location -- Location job was advertised
    programs -- Programs the job is specified for
    levels -- Levels job is intended for [Junior, Intermediate, Senior]
    openings -- Number of job openings
    index -- Boolean to indicate whether to index or not (default True)
    """

    summary = kwargs['summary']

    location = kwargs['location'].lower()

    levels = kwargs['levels']

    programs = []

    for program in kwargs['programs']:
        uw_program = Program.get_program(program)
        if uw_program:
            programs.append(uw_program)
        else:
            logger.error(COMPONENT,
                         'Error processing program: {}'.format(program))

    openings = 0

    try:
        if kwargs['openings']:
            openings = int(kwargs['openings']) or 0
    except Exception:
        pass

    index = False

    if index in kwargs:
        index = kwargs['index']

    job = Job.objects(id=kwargs['id']).first()

    remaining = job.openings

    # Job posting has decreased, some positions filled up
    if openings < job.openings:
        remaining = openings

    filtered_summary = engine.filter_summary(summary)

    summary_keywords = engine.get_keywords(filtered_summary, programs)

    filtered_summary_compare = re.sub(
        r'\W+', '',
        filtered_summary.lower().strip()).strip()
    job_summary_compare = re.sub(r'\W+', '',
                                 job.summary.lower().strip()).strip()

    employer = Employer.objects(jobs=kwargs['id']).first()

    # Job summary is not the same. In this case the employer most likely changed the job
    if not filtered_summary_compare == job_summary_compare:

        if openings >= 1:
            logger.info(
                COMPONENT,
                'Job: {}: different summary detected, deprecating and creating new job..'
                .format(kwargs['id']))

            job.update(set__deprecated=True)

            location = Location(name=location)

            keywords = [
                Keyword(keyword=k['keyword'], types=k['types'])
                for k in summary_keywords
            ]

            # Assume new job so number of remaining positions is same as openings
            new_job = Job(title=job.title,
                          summary=filtered_summary,
                          year=job.year,
                          term=job.term,
                          location=[location],
                          openings=openings,
                          remaining=openings,
                          levels=levels,
                          programs=programs,
                          url=job.url,
                          keywords=keywords)

            new_job.save()

            employer.update(push__jobs=new_job)

            if index:
                elastic.delete_employer_waterlooworks(employer)
                elastic.delete_job_waterlooworks(employer, job)
                elastic.index_employer_waterlooworks(employer)
                elastic.index_job_waterlooworks(employer, new_job)
        else:
            logger.info(
                COMPONENT,
                'Job: {}: different summary detected but invalid openings: {}, ignoring..'
                .format(job.title, openings))
    else:
        logger.info(COMPONENT,
                    'Job: {}: updating for current term'.format(kwargs['id']))

        location = Location(name=location)

        job.update(add_to_set__location=location,
                   set__remaining=remaining,
                   set__levels=list(set(levels + job.levels)),
                   set__programs=list(set(programs + job.programs)),
                   set__last_indexed=datetime.now())

        if index:
            elastic.update_job_waterlooworks(employer, job)
예제 #7
0
def import_job(**kwargs):
    """Import job.

    Keyword arguments:
    employer_name -- Employer name
    job_title -- Title of job
    summary -- Job summary
    year -- Year the job was advertised
    term -- Term job was advertised [Fall, Winter, Spring]
    location -- Location job was advertised
    openings -- Number of job openings
    remaining -- Number of job openings remaining
    applicants -- Number of applicants job has (Optional)
    levels -- Levels job is intended for [Junior, Intermediate, Senior]
    programs -- Programs the job is specified for
    url -- URL of job
    date -- Date job was crawled (useful for knowing exactly # of applicants at what time)
    index -- Boolean to indicate whether to index or not (default True)
    """

    employer_name = kwargs['employer_name'].lower()

    job_title = kwargs['job_title'].lower()

    term = kwargs['term']

    levels = []

    for level in kwargs['levels']:
        uw_level = Term.get_level(level)
        if uw_level:
            levels.append(uw_level)
        else:
            logger.error(COMPONENT, 'Error processing level: {}'.format(level))

    programs = []

    for program in kwargs['programs']:
        uw_program = Program.get_program(program)
        if uw_program:
            programs.append(uw_program)
        else:
            logger.error(COMPONENT,
                         'Error processing program: {}'.format(program))

    location = kwargs['location'].lower()

    openings = int(kwargs['openings'])

    remaining = int(kwargs['remaining']) if 'remaining' in kwargs else openings

    summary = kwargs['summary']

    filtered_summary = engine.filter_summary(summary)

    summary_keywords = engine.get_keywords(filtered_summary, programs)

    date = kwargs['date']

    year = date.year

    url = kwargs['url']

    applicants = 0

    try:
        if kwargs['applicants']:
            applicants = int(kwargs['applicants'])
    except Exception:
        pass

    index = False

    if index in kwargs:
        index = kwargs['index']

    logger.info(COMPONENT,
                'Importing job: {} from {}'.format(job_title, employer_name))

    # If employer does not exist, create it
    if not Employer.employer_exists(employer_name):
        logger.info(
            COMPONENT,
            'Employer: {} does not exist, creating..'.format(employer_name))

        employer = Employer(name=employer_name)

        logger.info(COMPONENT, 'Creating job: {}'.format(job_title))

        location = Location(name=location)

        applicant = Applicant(applicants=applicants, date=date)

        keywords = [
            Keyword(keyword=k['keyword'], types=k['types'])
            for k in summary_keywords
        ]

        # New job so number of remaining positions is same as openings
        job = Job(title=job_title,
                  summary=filtered_summary,
                  year=year,
                  term=term,
                  location=[location],
                  openings=openings,
                  remaining=remaining,
                  applicants=[applicant],
                  levels=levels,
                  programs=programs,
                  url=url,
                  keywords=keywords)

        job.save()
        job.reload()

        employer.jobs.append(job)
        employer.save()
        employer.reload()

        if index:
            elastic.index_employer_waterlooworks(employer)
            elastic.index_job_waterlooworks(employer, job)

    # Employer already exists
    else:
        employer = Employer.objects(
            name=employer_name).no_dereference().first()

        logger.info(COMPONENT,
                    'Employer: {} already exists'.format(employer_name))

        # If job does not exist, create it
        if not employer.job_exists(job_title):
            logger.info(COMPONENT, 'Creating job: {}'.format(job_title))

            location = Location(name=location)

            applicant = Applicant(applicants=applicants, date=date)

            keywords = [
                Keyword(keyword=k['keyword'], types=k['types'])
                for k in summary_keywords
            ]

            # New job so number of remaining positions is same as openings
            job = Job(title=job_title,
                      summary=engine.filter_summary(summary),
                      year=year,
                      term=term,
                      location=[location],
                      openings=openings,
                      remaining=remaining,
                      applicants=[applicant],
                      levels=levels,
                      programs=programs,
                      url=url,
                      keywords=keywords)

            job.save()
            job.reload()

            employer.update(push__jobs=job)

            if index:
                elastic.update_employer_waterlooworks(employer)
                elastic.index_job_waterlooworks(employer, job)

        # Job already exists
        else:
            logger.info(COMPONENT, 'Job: {} already exists'.format(job_title))

            job = Job.objects(id__in=[job.id for job in employer.jobs],
                              title=job_title).first()

            if not year >= job.year:
                raise DataIntegrityError(
                    'Job: {} by {} cannot be advertised before {}'.format(
                        job_title, employer_name, job.year))

            filtered_summary_compare = re.sub(
                r'\W+', '',
                filtered_summary.lower().strip()).strip()
            job_summary_compare = re.sub(r'\W+', '',
                                         job.summary.lower().strip()).strip()

            # Job summary is not the same. In this case the employer most likely changed the job
            if not filtered_summary_compare == job_summary_compare:

                if openings >= 1:
                    logger.info(
                        COMPONENT,
                        'Job: {}: different summary detected, deprecating and creating new job..'
                        .format(job_title))

                    job.update(set__deprecated=True)

                    location = Location(name=location)

                    applicant = Applicant(applicants=applicants, date=date)

                    keywords = [
                        Keyword(keyword=k['keyword'], types=k['types'])
                        for k in summary_keywords
                    ]

                    # Assume new job so number of remaining positions is same as openings
                    new_job = Job(title=job_title,
                                  summary=filtered_summary,
                                  year=year,
                                  term=term,
                                  location=[location],
                                  openings=openings,
                                  remaining=remaining,
                                  applicants=[applicant],
                                  levels=levels,
                                  programs=programs,
                                  url=url,
                                  keywords=keywords)

                    new_job.save()
                    new_job.reload()

                    employer.update(push__jobs=new_job)

                    if index:
                        elastic.delete_employer_waterlooworks(employer)
                        elastic.delete_job_waterlooworks(employer, job)
                        elastic.index_employer_waterlooworks(employer)
                        elastic.index_job_waterlooworks(employer, new_job)
                else:
                    logger.info(
                        COMPONENT,
                        'Job: {}: different summary detected but invalid openings: {}, ignoring..'
                        .format(job_title, openings))

            # Job is the same (same title and description)
            else:
                # If job is being advertised in new term
                if year != job.year or term != job.term:
                    logger.info(
                        COMPONENT,
                        'Job: {}: being advertised in new term, updating..'.
                        format(job_title))

                    # Add hire ratio for previous term
                    hire_ratio = float(job.openings -
                                       job.remaining) / job.openings

                    job.hire_rate.add_rating(hire_ratio)

                    location = Location(name=location)

                    applicant = Applicant(applicants=applicants, date=date)

                    hire_rate = AggregateRating(rating=job.hire_rate.rating,
                                                count=job.hire_rate.count)

                    job.update(set__year=year,
                               set__term=term,
                               add_to_set__location=location,
                               set__openings=openings,
                               set__remaining=remaining,
                               push__applicants=applicant,
                               set__hire_rate=hire_rate,
                               set__levels=levels,
                               set__programs=programs,
                               set__url=url,
                               set__last_indexed=datetime.now())

                    if index:
                        elastic.update_job_waterlooworks(employer, job)

                # Job is being updated. We need to update location, openings, levels, remaining, hire_rate, applicants
                else:
                    logger.info(
                        COMPONENT,
                        'Job: {}: updating for current term'.format(job_title))

                    remaining = job.remaining

                    # Job posting has decreased, some positions filled up
                    if openings < remaining:
                        remaining = openings

                    location = Location(name=location)

                    applicant = Applicant(applicants=applicants, date=date)

                    job.update(add_to_set__location=location,
                               set__remaining=remaining,
                               set__levels=list(set(levels + job.levels)),
                               push__applicants=applicant,
                               set__programs=list(set(programs +
                                                      job.programs)),
                               set__url=url,
                               set__last_indexed=datetime.now())

                    if index:
                        elastic.update_job_waterlooworks(employer, job)