Esempio n. 1
0
 def get_results(i):
     '''get results and check against the db if they are new. add to queue if new'''
     filters['start'] = i
     temp_list = []
     # get 25 results, using provided filters with start index
     [
         temp_list.append(x) for x in json.loads(
             CommonFuncs.convertBytesToString(api.search(
                 **filters)))['results']
     ]
     [
         new_jobs_queue.put(x) for x in temp_list
         if new_jobs_queue.unfinished_tasks < desired_result_count
     ]
Esempio n. 2
0
    def get_api_results(self, desired_result_count=1):
        '''return job json objects from the indeed api.'''

        job_profile = CommonFuncs.get_job_profile()

        # GET LOCATION IN JOB PROFILE
        locations = CommonFuncs.get_locations_list(job_profile)

        # KEYWORDS CONNECTED BY OR
        query_list = CommonFuncs.build_query_string(job_profile=job_profile,
                                                    or_delim='or',
                                                    bracket1='(',
                                                    bracket2=')',
                                                    adv_supp=True)
        query_string = query_list[0]

        new_jobs_queue = queue.Queue(maxsize=0)
        new_jobs = None

        limit = '25'  # 25 is the max results per request
        lookback_period = '60'  # default lookback period
        client_id = {}
        api = None

        # CONNECT TO INDEED API FOR JOB QUERIES
        try:
            client_id = json.load(open(API_KEYS_PATH, 'r'))
            api = IndeedClient(publisher=client_id['publisher_id'])
        except:
            ValueError('No publisher id found. Filtering aborted.')

        filters = {
            'q': query_string,
            'l': '',
            'userip': "1.2.3.4",
            'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)",
            "raw": "False",
            "sort": "date",
            "radius": job_profile.radius,
            "limit": limit,
            "fromage": lookback_period,
        }

        # FIND NEW JOB JSON OBJECT USING INDEED API
        # GET NEW JOBS

        for location in locations:  # iterate over each location
            filters['l'] = location
            filters['q'] = query_string

            # THREAD-BRAINED APPROACH to get all results at once
            def get_results(i):
                '''get results and check against the db if they are new. add to queue if new'''
                filters['start'] = i
                temp_list = []
                # get 25 results, using provided filters with start index
                [
                    temp_list.append(x) for x in json.loads(
                        CommonFuncs.convertBytesToString(api.search(
                            **filters)))['results']
                ]
                [
                    new_jobs_queue.put(x) for x in temp_list
                    if new_jobs_queue.unfinished_tasks < desired_result_count
                ]

            result_count = int(
                json.loads(
                    CommonFuncs.convertBytesToString(
                        api.search(**filters)))['totalResults'])

            list_of_filter_starts = [
                str(i) for i in range(0, result_count, 25)
            ]  # build list of start positions

            for item in list_of_filter_starts:
                if not new_jobs_queue.unfinished_tasks < desired_result_count:
                    break
                get_results(item)

            new_jobs = list(
                new_jobs_queue.queue)  # append query results to list

        # RETURN JOBS
        if new_jobs:
            if desired_result_count == 1:  # just return a single job, not in a list
                return new_jobs[0]
            elif desired_result_count <= len(
                    new_jobs
            ):  # if we have more than enough new jobs, return those in a list
                return new_jobs[0:desired_result_count]
            else:  # if more than the available number of new jobs requested, return all that could be found
                return new_jobs
        else:
            return []  # if no new links found