Ejemplo n.º 1
0
def get_data():
    client = IndeedClient('7381316591612982')
    params = {
        'q': "front end engineer",
        'l': "austin",
        'userip': "172.68.141.95",
        'useragent':
        """Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36
                        (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36""",
        'limit': 25
    }
    search_response = client.search(**params)

    cities = [
        'New York, NY', 'Austin, TX', 'San Francisco, CA', 'Boston, MA',
        'Chicago, IL', 'Miami, FL'
    ]
    jobs = [
        'Front End Engineer', 'Back End Engineer', 'Data Science',
        'Product Management', 'Director of Engineering', 'Data Engineer',
        'Data Analyst', 'Accounting', 'Marketing', 'Finance', 'Nurse',
        'Doctor', 'Lawyer', 'Paralegal', 'sales', 'customer_service',
        'human resources', 'executive assistant', 'operations', 'teacher',
        'maintenance', 'security guards'
    ]

    res_list = ['jobs']
    for c in cities:
        for j in jobs:
            params = {
                'q': j,
                'l': c,
                'userip': "172.68.141.95",
                'useragent':
                """Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36
                                (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36""",
                'limit': 25
            }
            search_response = client.search(**params)
            for res in search_response['results']:
                job_dict = {}
                if not res['expired']:
                    job_dict['city'] = res['city']
                    job_dict['date_posted'] = res['date']
                    job_dict['company'] = res['company']
                    job_dict['title'] = res['jobtitle']
                    job_dict['url'] = res['url']
                    job_dict['job_id'] = res['jobkey']
                    job_dict['state'] = res['state']
                    job_dict['snippet'] = res['snippet']
                res_list.append(job_dict)

    return res_list
Ejemplo n.º 2
0
    def scrape_indeed(self, api_key, ip_address, places=None):
        indeed_client = IndeedClient(api_key)
        indeed_matched_jobs = []
        seen_jobs = self.load_titles('indeed_jobs')

        if not places:
            places = ['san francisco, ca']

        for place, term in [(place, term)
                            for place in places 
                            for term in self.filters.keys()]:
            sys.stderr.write('Searching {} Indeed for {}... '.format(place, term))
            # time.sleep(random.randrange(1, 3))  # throttle requests
            params = {
                'q': term,
                'l': place,
                'userip': ip_address,
                'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)",
                'limit': 25}
            search_response = indeed_client.search(**params)
            job_results = search_response['results']
            sys.stdout.write('returned {} items\n'.format(len(job_results)))

            for job in job_results:
                job_id = job['jobkey']
                if job_id not in seen_jobs:
                    seen_jobs.add(job_id)
                    job_title = job['jobtitle']
                    if self.filter_title(job_title, self.filters[term]):
                        indeed_matched_jobs.append([
                            job_title, job['formattedLocationFull'], job['url'], job['snippet']])

        self.save_titles('indeed_jobs', seen_jobs)
        return indeed_matched_jobs
Ejemplo n.º 3
0
def generate_job_list(params,publisher_id):
    """
    Returns list of jobs that match search criteria
    """
    job_list = []
    #since we initiated params['start'] at 0
    total_results = 1     
    while int(params['start']) < total_results:
        client = IndeedClient(publisher = publisher_id)
        search_response = client.search(**params)
        root = ET.fromstring(search_response)
        params['start']  = str(int(params['start'])+25) 
        total_results = int(root.find('totalresults').text)         
        for job in root.iter('result'):
            jobtitle = job.find('jobtitle').text 
            company = job.find('company').text
            city = job.find('city').text
            #state = job.find('state').text
            #country = job.find('country').text
            date = job.find('date').text
            snippet = job.find('snippet').text
            sponsored = job.find('sponsored').text
            url = job.find('url').text
            job = (unicode(jobtitle),unicode(company),unicode(city),unicode(date)[5:16].replace(" ","-"),unicode(sponsored), unicode(url))
            if job not in job_list:
                job_list.append(job)         
            
    job_list.insert(0,(unicode("jobtitle"),unicode("company"),unicode("city"),unicode("date"),unicode("sponsored"), unicode("url"))) #add header    
    return job_list
def get_indeed_job_list(query, location, radius):
    client = IndeedClient(publisher=2863621289879018)
    progress_bar = pyprind.ProgBar(4, title='Searching For Jobs')
    results_pd = pd.DataFrame()
    for numb_results in range(0, 100, 25):
        params = {
            'q': query,
            'radius': radius,
            'l': location,
            'userip': "1.2.3.4",
            'limit': '25',
            'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)",
            'start': numb_results
        }
        search_response = client.search(**params)
        results_pd = pd.concat(
            [results_pd,
             pd.DataFrame.from_dict(search_response['results'])],
            axis=0)
        progress_bar.update()
    if len(results_pd) == 0:
        sys.exit('Search did not return any jobs')
    results_pd.reset_index(drop=True, inplace=True)
    results_pd['date'] = pd.to_datetime(results_pd.date)
    results_pd.drop([
        'source', 'expired', 'country', 'formattedLocation',
        'formattedLocationFull', 'onmousedown', 'stations', 'state',
        'sponsored'
    ],
                    axis=1,
                    inplace=True)
    return results_pd  # returns the search results as a pandas data frame
Ejemplo n.º 5
0
    def job_search(self, job, location):
        # publisher=5950869068484812
        client = IndeedClient('5950869068484812')

        #params = generate_advanced_query("python", "Boston", 1, 0, 25)
        params = self.generate_advanced_query(job, location, 1, 0, 25)
        search_response = client.search(**params)
        print "Search Response: %s" % search_response

        filename = 'indeed_positions_json.txt'
        self.write_json_to_file(filename, search_response)


        (positions, total) = self.extract_query_result(search_response)
        print total

        jobkeys = []
        for position in positions:
            self.extract_position_info(position, jobkeys)

        #for i in range(len(jobkeys)):
            #print "range (%d: %s)" % (i, jobkeys[i])

            #print '*' * 100
            #job_response = client.jobs(jobkeys = "ad752ce9ae3f1b5e")
            #print job_response['results']
            #print job_response
            #filename = 'indeed_positions_json.txt'
            #self.write_json_to_file(filename, job_response)
        return jobkeys
Ejemplo n.º 6
0
    def search_with_api(self, params: dict):
        client = IndeedClient(publisher=self.user_config.INDEED_API_KEY)
        search_response = client.search(**params)

        total_number_hits = search_response['totalResults']
        num_loops = int(total_number_hits /
                        IndeedConstants.API.MAX_NUM_RESULTS_PER_REQUEST)
        counter_start = 0

        print('Total number of hits: {0}'.format(total_number_hits))
        count_jobs_added = 0

        for i in range(0, num_loops):
            # We can get around MAX_NUM_RESULTS_PER_REQUEST by increasing our start location on each loop!
            params['start'] = counter_start

            search_response = client.search(**params)
            list_jobs = IndeedParser.get_jobs_from_response(search_response)
            for job in list_jobs:
                try:
                    # TODO: This sucks, I'm just repeating myself...
                    Job.create(key=job.key,
                               website=job.website,
                               link=job.link,
                               title=job.title,
                               company=job.company,
                               city=job.city,
                               state=job.state,
                               country=job.country,
                               location=job.location,
                               posted_date=job.posted_date,
                               expired=job.expired,
                               easy_apply=job.easy_apply)
                    count_jobs_added += 1

                except peewee.IntegrityError as e:
                    # TODO: Can I write a custom exception that catches UNIQUE Errors but not others?
                    if 'UNIQUE' in str(e):
                        pass
                    else:
                        print(str(e))

            # Increment start
            counter_start += IndeedConstants.API.MAX_NUM_RESULTS_PER_REQUEST

        print('Added {0} new jobs'.format(count_jobs_added))
Ejemplo n.º 7
0
def indeed_urls(parameters, publisher_key=None):
    """Use Indeed publisher ID to retrieve URLs from the Indeed API."""
    if publisher_key is None:
        publisher_key = os.environ["API_KEY"]
    client = IndeedClient(publisher_key)
    response = client.search(**parameters)
    try:
        urls = [str(links["url"]) for links in response["results"]]
        return urls
    except KeyError:
        raise NameError("Invalid Publisher ID")
Ejemplo n.º 8
0
def indeed_urls(parameters, publisher_key=None):
    """Use Indeed publisher ID to retrieve URLs from the Indeed API."""
    if publisher_key is None:
        publisher_key = os.environ['API_KEY']
    client = IndeedClient(publisher_key)
    response = client.search(**parameters)
    try:
        urls = [str(links['url']) for links in response['results']]
        return urls
    except KeyError:
        raise NameError('Invalid Publisher ID')
Ejemplo n.º 9
0
def Search(query, location, limit=10, start=0):
    client = IndeedClient(publisher=PUBLISHER_ID)
    params = {
        'q': query,
        'l': location,
        'limit': limit,
        'start': start,
        'userip': "1.2.3.4",
        'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)"
    }
    search_response = client.search(**params)
    return search_response
Ejemplo n.º 10
0
def Search(query, location, limit=10, start=0):
    client = IndeedClient(publisher=PUBLISHER_ID)
    params = {
        'q': query,
        'l': location,
        'limit': limit,
        'start': start,
        'userip': "1.2.3.4",
        'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)"
    }
    search_response = client.search(**params)
    return search_response
Ejemplo n.º 11
0
def access_indeed_api(parameters, publisher_key=None):
    """Access the Indeed API using the given parameters and publisher key.

    Positional argument:
    parameters -- a dictionary of the parameters to send to Indeed's API

    Keyword argument:
    publisher_key -- the publisher key for Indeed's API, defaults to environment variable
    """
    if publisher_key is None:
        publisher_key = os.environ['API_KEY']
    client = IndeedClient(publisher_key)
    response = client.search(**parameters)
    return response
class Threadr(object):
    def __init__(self, keyword, location):
        self.conn = boto.connect_s3()  # Connecting to S3
        self.bucket = self.conn.get_bucket(
            'bucketofindeeds')  # Accessing the correct bucket
        self.json_up = Key(self.bucket)  # Make sure to name it.
        self.content_up = Key(self.bucket)  # Make sure to name it.
        self.keyword = keyword
        self.location = location
        print('init done')

    def connect_indeed(self,
                       config_filepath='indeed_cred.yml'):  # Store in .ssh
        # yamload = yaml.load(open(config_filepath))
        # credentials = yamload['indeed']
        # pub_num = credentials.get('publisher_num')
        self.c = IndeedClient(publisher='4353162753214099')
        print('connect_indeed done')

    def parameters(self, keyword,
                   location):  # Make sure to try using multiple keywords
        ua = UserAgent(fallback='Your favorite Browser')
        self.params = {
            'q': str(keyword),
            'l': str(location),
            'userip': requests.get("http://icanhazip.com").text,
            'useragent': ua.random
        }
        print('parameters done')

    def job_search(self):
        self.response = self.c.search(**self.params)
        # This will return a json file.
        print(len(self.response['results']), 'jobs returned.')

    def send_json(self):
        self.json_up.key = 'indeed_jsons/test'
        self.json_up.set_contents_from_string(str(self.response) + '\n')
        print('Its Working.')

    def mine_that(self):
        self.connect_indeed()
        self.parameters(self.keyword, self.location)
        self.job_search()
        self.send_json()
Ejemplo n.º 13
0
def get_job_description(input_skills):
    client = IndeedClient('7863709885041358')

    params = {
        'q': input_skills,
        'userip': "1.2.3.4",
        'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)",
        'limit': 25
    }
    job_urls = []
    search_response = client.search(**params)
    for job in search_response['results']:
        job_urls.append(job['url'])
    bunch_of_words = []
    for each_url in job_urls:
        bunch_of_words.extend(text_cleaner(each_url))

    return bunch_of_words
Ejemplo n.º 14
0
def main():
    # publisher=5950869068484812
    client = IndeedClient('5950869068484812')

    params = generate_advanced_query("python", "Boston", 10, 0, 25)
    search_response = client.search(**params)
    #print search_response

    #filename = 'indeed_positions_json.txt'
    # write_json_to_file(filename, search_response)

    (positions, total) = extract_query_result(search_response)
    print total

    jobkeys = []
    for position in positions:
        extract_position_info(position, jobkeys)

    for i in range(len(jobkeys)):
        print jobkeys[i]
Ejemplo n.º 15
0
def fetch_indeed_data(counties,search): 
    from indeed import IndeedClient
    client = IndeedClient('6437444271691851')
    params = {
        'q' : "analytics",
        'l' : "bergen county, nj",
        'userip' : "1.2.3.4",
        'useragent' : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)",
        'latlong' : 1,
        'radius' : 10,
        'fromage' : 7,
        'limit' : 25
    }
    params['q'] = search
    
    results = []
    for county in counties:
        params['l'] = county
        results.append(client.search(**params))
        
    return(results)
Ejemplo n.º 16
0
def main():
    client = IndeedClient(PUB_ID)
    search_params = build_params(locations, JOB_QUERY)

    search_results = []
    count = 1
    for params in search_params:
        stdout.flush()
        stdout.write("\rHtml request: {}/{}".format(count, len(locations)))
        search_response = client.search(**params)
        search_results.append(search_response)
        count += 1

    word_filter = ['and', 'to', 'the', 'of', 'a', 'in', 'with', 'you', 'on', 'that', 'are', 'will', 'is', 'your', 'for',
                   'we', 'from', 'an', 'be', 'have', 'or', 'just', 'can', 'also', 'how', 'at', 'as', 'do', 'other',
                   'should', 'what', 'us', 'this', 'it', 'if', 'get', '-', '&', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
                   'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

    count = 1
    number_of_locations = len(search_results)
    word_map = Counter()

    for search in search_results:
        print "Currently on {}/{}".format(count, number_of_locations)
        if len(search['results']) == 0:
            print "Nothing found for: {}".format(search['location'])
        else:
            print "Attempting {}...".format(search['location'])
        for job in search['results']:
            url = job['url']
            html = requests.get(url)
            word_list = pull_job_description(html.content)

            for word in word_list:
                if word.lower() not in word_filter:
                    word_map[word.lower()] += 1
        count += 1

        save_to_file(OUTPUT_FILE, word_map)
Ejemplo n.º 17
0
    def scrape_indeed(self, api_key, ip_address, places=None):
        indeed_client = IndeedClient(api_key)
        indeed_matched_jobs = []
        seen_jobs = self.load_titles('indeed_jobs')

        if not places:
            places = ['san francisco, ca']

        for place, term in [(place, term) for place in places
                            for term in self.filters.keys()]:
            sys.stderr.write('Searching {} Indeed for {}... '.format(
                place, term))
            # time.sleep(random.randrange(1, 3))  # throttle requests
            params = {
                'q': term,
                'l': place,
                'userip': ip_address,
                'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)",
                'limit': 25
            }
            search_response = indeed_client.search(**params)
            job_results = search_response['results']
            sys.stdout.write('returned {} items\n'.format(len(job_results)))

            for job in job_results:
                job_id = job['jobkey']
                if job_id not in seen_jobs:
                    seen_jobs.add(job_id)
                    job_title = job['jobtitle']
                    if self.filter_title(job_title, self.filters[term]):
                        indeed_matched_jobs.append([
                            job_title, job['formattedLocationFull'],
                            job['url'], job['snippet']
                        ])

        self.save_titles('indeed_jobs', seen_jobs)
        return indeed_matched_jobs
Ejemplo n.º 18
0
def fullmap():
    gmaps = googlemaps.Client(key="AIzaSyAx1j38VITDr2p2-VclAyX8pSOp7C_1-kM")
    lctn = gmaps.geolocate()
    #reverse = gmaps.reverse_geocode(latlng = [lctn['location']['lat'],lctn['location']['lng']] )

    client = IndeedClient('1905750874242217')
    params = {
        'q': "python",
        'l': "Kharkiv",
        'userip': "1.2.3.4",
        'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)",
        'co': "UA",
        'latlong': 1,
        'start': 0,
        'limit': 25
    }
    search_response = client.search(**params)
    jobs = json_normalize(search_response['results'])

    jobs_markers = [{
        'icon': '//maps.google.com/mapfiles/ms/icons/blue-dot.png',
        'lat': lctn['location']['lat'],
        'lng': lctn['location']['lng'],
        'infobox': "My Location"
    }]

    for index, row in jobs.iterrows():
        get_address = gmaps.places(query=row['company'] + ' ' + row['city'],
                                   location=str(lctn['location']['lat']) +
                                   ',' + str(lctn['location']['lng']))
        company = json_normalize(get_address['results'])
        for index, row_company in company.iterrows():
            jobs_markers.append({
                'icon':
                '//maps.google.com/mapfiles/ms/icons/red-dot.png',
                'lat':
                row_company['geometry.location.lat'],
                'lng':
                row_company['geometry.location.lng'],
                'infobox':
                row['company'] + ' - ' + row_company['formatted_address'] +
                ' snippet:' + row['snippet']
            })

    #ltn = location()

    fullmap = Map(
        identifier="fullmap",
        varname="fullmap",
        style=("height:70%;"
               "width:99%;"
               "top:50;"
               "left:10;"
               "position:absolute;"
               "z-index:200;"),
        lat=lctn['location']['lat'],
        lng=lctn['location']['lng'],
        markers=jobs_markers,
        # maptype = "TERRAIN",
        zoom="11",
        #cluster=True
        fit_markers_to_bounds=True)
    return render_template('example_fullmap.html',
                           fullmap=fullmap,
                           GOOGLEMAPS_KEY=request.args.get('apikey'))
Ejemplo n.º 19
0
for loc in locations:
    params['l'] = loc
    print "Nuova Location ", loc

    for jt in job_titles:
        params['q'] = jt
        print "Inizio ricerca per ", loc, jt
        check = False

        for i in range(
                25, 1025,
                25):  # il secondo parametro va impostato = al n di offerte tot
            params['limit'] = i
            params['start'] = i - 25
            client.search(**params)
            request = client.search(**params)
            #storage.append(request)
            print "Richiesta: ", loc, jt, ". Ciclo: ", i, ", numero restituite: ", len(
                request['results'])

            if (len(request['results']) < 25):
                if (check):
                    break
                else:
                    check = True

            num_errors = 0
            #for block in storage:
            for job in request['results']:
                data_job = (job['formattedRelativeTime'], job['city'],
Ejemplo n.º 20
0
class indeed:

    #jobDataFrame

    def __init__(self):
        #        self.jobDataFrame= pd.DataFrame();
        self.client = IndeedClient(8836246992678581)

    def skill(self, l, city, jobtype):
        #print l
        #print " AND ".join(l)
        print(jobtype)
        if jobtype in ['intern', 'internship', 'Internship']:
            jobtype = 'internship'
        else:
            jobtype = 'fulltime'
        params = {
            'q': " AND ".join(l),
            'l': city,
            'jt': jobtype,
            'userip': "1.2.3.4",
            'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)",
            'limit': "25",
            'start': 0,
            'highlight': 1
        }
        i = 25
        search_response = self.client.search(**params)
        results = []
        if (len(search_response['results']) <= 0):
            return results

        while (i < 100 and i < search_response['totalResults']):
            results += search_response['results']
            params['start'] += 25
            search_response = self.client.search(**params)
            results += search_response['results']
            i += 25
            print(params['start'])

        self.jobDataFrame = pd.DataFrame(results).drop_duplicates('jobkey')
        self.jobDataFrame.to_csv("sample.csv", encoding='UTF-8')
        return results

    def skillOR(self, l, city, jobtype):
        #print l
        #print " AND ".join(l)
        print(jobtype)
        if jobtype in ['intern', 'internship', 'Internship']:
            jobtype = 'internship'
        else:
            jobtype = 'fulltime'
        params = {
            'q': " OR ".join(l),
            'l': city,
            'jt': jobtype,
            'userip': "1.2.3.4",
            'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)",
            'limit': "50"
        }
        i = 25
        search_response = self.client.search(**params)
        results = []
        if (len(search_response['results']) <= 0):
            return results

        while (i < 100 and i < search_response['totalResults']):
            results += search_response['results']
            params['start'] += 25
            search_response = self.client.search(**params)
            results += search_response['results']
            i += 25
            print(params['start'])

        self.jobDataFrame = pd.DataFrame(results).drop_duplicates('jobkey')
        self.jobDataFrame.to_csv("sample.csv", encoding='UTF-8')
        return results

    def similarJobs(self, job):
        print("the job is" + job)
        sampledfo = pd.read_csv("sample.csv", encoding='UTF-8')
        sampledf = sampledfo.copy()
        del sampledf['stations']
        del sampledf['Unnamed: 0']
        del sampledf['source']
        del sampledf['onmousedown']
        del sampledf['formattedLocation']
        del sampledf['formattedLocationFull']
        del sampledf['url']
        del sampledf['date']
        del sampledf['formattedRelativeTime']
        sampledf['indeedApply'] = [
            0 if x == 'false' else 1 for x in sampledf['indeedApply']
        ]
        sampledf['expired'] = [
            0 if x == 'false' else 1 for x in sampledf['expired']
        ]
        sampledf['sponsored'] = [
            0 if x == 'false' else 1 for x in sampledf['sponsored']
        ]
        jobNo = job
        self.dataJob = sampledf.loc[sampledf['jobkey'] == jobNo]
        df = sampledf[sampledf["jobkey"] != jobNo]
        #        df[''] = ['red' if x == 'Z' else 'green' for x in df['Set']]
        df.ix[df.city == self.dataJob.city.iloc[0],
              ['city', 'country', 'state']] = 1
        df.ix[df.city != 1, ['city', 'country', 'state']] = 0
        df.ix[df.company == self.dataJob.company.iloc[0], ['company']] = 1
        df.ix[df.company != 1, ['company']] = 0

        #        df[''] = df.apply(my_test2, axis=1)

        df['snippet'] = [
            textSim.cosine_sim(x, self.dataJob.snippet.iloc[0])
            for x in df['snippet']
        ]
        df['jobtitle'] = [
            textSim.cosine_sim(x, self.dataJob.jobtitle.iloc[0])
            for x in df['jobtitle']
        ]

        df['variance'] = df['city'] + df['company'] + df['country'] + df[
            'expired'] + df[
                'indeedApply'] + 10 * df['snippet'] + 5 * df['jobtitle']

        result = df.sort(['variance'], ascending=False)
        #import pdb; pdb.set_trace()
        simList = result['jobkey'][:10].tolist()
        simDict = []
        for x in simList:
            s = sampledfo.loc[sampledfo['jobkey'] == x]
            simDict.append(s.to_dict(orient='records')[0])
        return simDict
Ejemplo n.º 21
0
def indeedAPI(defTask):
    params = {}
    params['userip'] = "1.2.3.4",
    params['useragent'] = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0)"
    params['start'] = 1
    params['latlong'] = 1
    params['as_ttl'] = ''
    params['limit'] = 25
    params['fromage']='any'
    params['radius'] = 0
    params['q'] =  "grants+management"
    params['highlight'] = 0
    params['jobtitle'] = ''
    compincr = 25
    complevels = 11
    indeedapi = IndeedClient(publisher='7423517030312598')
    print
    print 'START:',str(time.asctime(time.localtime()))
    newJobs = 0
    expiredJobs = 0
    states=["AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID","IL","IN","IA","KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT","VA","WA","WV","WI","WY"]
    oldlist=[]
    newlist=[]
    runStart = time.time()
    for job in db.fluxxJobs.find():
        oldlist.append(job['jobkey'])
    for state in states:
        print state,
        params['l'] = state
        for c in range(complevels):
            params['salary'] = "$" + str(c * 25) + "K-$" + str(((c+1) * 25)-1) + "K"
            if c == (complevels-1):
                params['salary'] = "$" + str(c*compincr) + "K"
            sr = indeedapi.search(**params)
            tr = sr['totalResults']
            ps = params['salary'].replace("$","")
            for apirequests in range((tr/compincr)+1):
                params['start'] = (apirequests * compincr)
                sr = indeedapi.search(**params)
                for joblisting in sr['results']:
                    jobListing = json.loads(json.dumps(joblisting))
                    newlist.append(jobListing['jobkey'])
                    if joblisting['jobkey'] not in oldlist:
                        newJobs += 1
                        listed = joblisting['date'].replace('GMT','UTC')
                        joblisting['dateOpen'] = datetime.strftime(datetime.strptime(listed, '%a, %d %b %Y %H:%M:%S %Z'), '%Y%m%d %H:%M:%S')
                        joblisting['datesOpen'] = timeDictStamp('')
                        joblisting['_id'] = joblisting['jobkey']
                        joblisting['status'] = 'Open'
                        joblisting['searchparams'] = params
                        joblisting['searchparams']['procTime']=datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                        joblisting['searchparams']['totalResults'] = tr
                        joblisting['compMin'] = c*25000
                        joblisting['compMax'] = (c+1) * 25000
                        joblisting['compRange'] = params['salary']
                        if joblisting['city'] == "":
                            del joblisting['city']
                        if joblisting['state'] == "":
                            del joblisting['state']
                        job = joblisting
                        jobID=job['_id']
                        Title=job['jobtitle'][0:60].replace("'","")
                        Company=job['company'].replace("'","").encode('latin-1','ignore')
                        if 'source' in job:
                            Source=job['source'].replace("'","")
                        else:
                            Source = ''
                        Description=job['snippet'][0:250].replace("'","").encode('latin-1','ignore').replace("'","")
                        Description = cleanup(Description,{'<b>':'','</b>':'','<B>':'','</B>':''})
                        listed = job['date'].replace('GMT','UTC')
                        job['dateListed'] = datetime.strftime(datetime.strptime(listed, '%a, %d %b %Y %H:%M:%S %Z'), '%Y%m%d %H:%M:%S')
                        CompRange_min=job['compMin']
                        CompRange_max=job['compMax']
                        CompRange=job['compRange']
                        textURL = "http://www.indeed.com/viewjob?jk=" + job['jobkey']
                        jd = {}
                        jd['jdText'] = alchemy(textURL,'URLGetText','text')
                        jd['jdConcepts'] = alchemy(textURL,'URLGetRankedConcepts','concepts')
                        jd = json.loads(json.dumps(jd))
                        job['jobDescription'] = jd
                        if not db.fluxxJobs.find_one({'jobkey':job['jobkey']}):
                            db.grantsJobs.save(job)
    delisted = set(oldlist).difference(set(newlist))
    for jobkey in delisted:
        expiredJobs+=1
        rightnow = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        upResult = db.grantsJobs.update({'jobkey':jobkey},{'$set':{'dateClosed':rightnow,'datesClosed':timeDictStamp(),'status':'Closed','expired':'true'}})
    print " "
    print 'FINISH:',str(time.asctime(time.localtime()))
    print '=================================================================================================='
Ejemplo n.º 22
0
import time

client = IndeedClient(publisher='')

params = {
    'q': "internship",
    'l': "Zurich",
    'userip': "1.2.3.4",
    'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)",
    'radius': 50,
    'limit': 100,
    'co': 'ch',
    'sort': 'date'
}

search_response = client.search(**params)

filename = 'jobs_' + str(time.localtime()[0]) + str(time.localtime()[1]) + str(
    time.localtime()[2]) + '.txt'

with open(r'export path' + filename, 'w') as textfile:
    textfile.write('acquisition time: ' + str(time.localtime()[3]) + ':' +
                   str(time.localtime()[4]) + '\n\n')
    for i in range(0, len(search_response)):
        reltime = search_response['results'][i]['formattedRelativeTime']
        jobtitle = search_response['results'][i]['jobtitle']
        company = search_response['results'][i]['company']
        url = search_response['results'][i]['url']
        textfile.write(reltime + '\t' + jobtitle + '\t company: ' + company +
                       '\n' + url + '\n\n')
textfile.close()
Ejemplo n.º 23
0
newJobs = 0
expiredJobs = 0
states=["AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID","IL","IN","IA","KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT","VA","WA","WV","WI","WY"]
oldlist=[]
newlist=[]
runStart = time.time()
for job in db.fluxxJobs.find():
    oldlist.append(job['jobkey'])
for state in states:
    print state,
    params['l'] = state
    for c in range(complevels):
        params['salary'] = "$" + str(c * 25) + "K-$" + str(((c+1) * 25)-1) + "K"
        if c == (complevels-1):
            params['salary'] = "$" + str(c*compincr) + "K"
        sr = indeedapi.search(**params)
        tr = sr['totalResults']
        ps = params['salary'].replace("$","")
        for apirequests in range((tr/compincr)+1):
            params['start'] = (apirequests * compincr)
            sr = indeedapi.search(**params)
            for joblisting in sr['results']:
                jobListing = json.loads(json.dumps(joblisting))
                newlist.append(jobListing['jobkey'])
                if joblisting['jobkey'] not in oldlist:
                    newJobs += 1
                    listed = joblisting['date'].replace('GMT','UTC')
                    joblisting['dateOpen'] = datetime.datetime.strftime(datetime.datetime.strptime(listed, '%a, %d %b %Y %H:%M:%S %Z'), '%Y%m%d %H:%M:%S')
                    joblisting['datesOpen'] = timeDictStamp()
                    joblisting['_id'] = joblisting['jobkey']
                    joblisting['status'] = 'Open'
Ejemplo n.º 24
0
def indeedAPI2(defTask):
    params = {}
    params['userip'] = "1.2.3.4",
    params['useragent'] = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0)"
    params['start'] = 1
    params['latlong'] = 1
    params['as_ttl'] = ''
    params['limit'] = 25
    params['fromage']='any'
    params['radius'] = 0
    params['q'] =  ''
    params['highlight'] = 0
    params['jobtitle'] = ''
    compincr = 25
    complevels = 11
    indeedapi = IndeedClient(publisher='7423517030312598')
    print params
    print 'START:',str(time.asctime(time.localtime()))
    newJobs = 0
    expiredJobs = 0
    # states=["AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID","IL","IN","IA","KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT","VA","WA","WV","WI","WY"]
    states = ['Climate and Land Use Alliance','John S. and John L. Knight Foundation','Cynthia and George Mitchell Foundation','Atlantic Philanthropies','Council of State Governments','Leukemia & Lymphoma Society','John D and Catherine T. MacArthur Foundation','Unbound Philanthropy','Garfield Foundation','Freedom House','Wikimedia Foundation','AFDO','Getty Foundation','Altman Foundation','Colorado Trust','Jessie Ball duPont Fund','Arthur Vining Davis Foundations','The Christensen Fund','Rita Allen Foundation','NBA Legends','Trio Foundation of St. Louis','Surdna Foundation','Kresge Foundation','Carnegie Corporation of New York','Central Valley Community Foundation','Democracy Fund','Committee to Protect Journalists','American Cancer Society','Winthrop Rockefeller Foundation','Walter and Elise Haas Fund','ClimateWorks Foundation','Zellerbach Family Foundation','Hillman Family Foundations','Bosch Community Fund','The Scan Foundation','Hogg Foundation','Unitarian Universalist Service Committee','Whole Foods Market','Open Road Foundation','Max M. & Marjorie S. Fisher Foundation','ArtPlace America','Grace and Mercy Foundation','Alliance for Early Success','The New York Womens Foundation','DentaQuest','ECMC Foundation','Great Lakes Higher Education Guaranty','The J. Willard and Alice S. Marriott Foundation','Indiana Historical Society','Wallace H. Coulter Foundation']
    oldlist=[]
    newlist=[]
    runStart = time.time()
    print params
    for job in db.fluxxJobs.find():
        oldlist.append(job['jobkey'])
    for state in states:
        print state
        params['company'] = state
        for c in range(complevels):
            params['salary'] = "$" + str(c * 25) + "K-$" + str(((c+1) * 25)-1) + "K"
            if c == (complevels-1):
                params['salary'] = "$" + str(c*compincr) + "K"
            sr = indeedapi.search(**params)
            tr = sr['totalResults']
            ps = params['salary'].replace("$","")
            for apirequests in range((tr/compincr)+1):
                params['start'] = (apirequests * compincr)
                sr = indeedapi.search(**params)
                for joblisting in sr['results']:
                    jobListing = json.loads(json.dumps(joblisting))
                    newlist.append(jobListing['jobkey'])
                    if joblisting['jobkey'] not in oldlist:
                        newJobs += 1
                        listed = joblisting['date'].replace('GMT','UTC')
                        joblisting['dateOpen'] = datetime.strftime(datetime.strptime(listed, '%a, %d %b %Y %H:%M:%S %Z'), '%Y%m%d %H:%M:%S')
                        joblisting['datesOpen'] = timeDictStamp('')
                        joblisting['_id'] = joblisting['jobkey']
                        joblisting['status'] = 'Open'
                        joblisting['searchparams'] = params
                        joblisting['searchparams']['procTime']=datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                        joblisting['searchparams']['totalResults'] = tr
                        joblisting['compMin'] = c*25000
                        joblisting['compMax'] = (c+1) * 25000
                        joblisting['compRange'] = params['salary']
                        if joblisting['city'] == "":
                            del joblisting['city']
                        if joblisting['state'] == "":
                            del joblisting['state']
                        job = joblisting
                        jobID=job['_id']
                        Title=job['jobtitle'][0:60].replace("'","")
                        Company=job['company'].replace("'","").encode('latin-1','ignore')
                        if 'source' in job:
                            Source=job['source'].replace("'","")
                        else:
                            Source = ''
                        Description=job['snippet'][0:250].replace("'","").encode('latin-1','ignore').replace("'","")
                        Description = cleanup(Description,{'<b>':'','</b>':'','<B>':'','</B>':''})
                        listed = job['date'].replace('GMT','UTC')
                        job['dateListed'] = datetime.strftime(datetime.strptime(listed, '%a, %d %b %Y %H:%M:%S %Z'), '%Y%m%d %H:%M:%S')
                        CompRange_min=job['compMin']
                        CompRange_max=job['compMax']
                        CompRange=job['compRange']
                        textURL = "http://www.indeed.com/viewjob?jk=" + job['jobkey']
                        jd = {}
                        jd['jdText'] = alchemy(textURL,'URLGetText','text')
                        jd['jdConcepts'] = alchemy(textURL,'URLGetRankedConcepts','concepts')
                        jd = json.loads(json.dumps(jd))
                        job['jobDescription'] = jd
                        if not db.fluxxJobs.find_one({'jobkey':job['jobkey']}):
                            db.grantsJobs.save(job)
    delisted = set(oldlist).difference(set(newlist))
    for jobkey in delisted:
        expiredJobs+=1
        rightnow = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        upResult = db.grantsJobs.update({'jobkey':jobkey},{'$set':{'dateClosed':rightnow,'datesClosed':timeDictStamp(),'status':'Closed','expired':'true'}})
    print " "
    print 'FINISH:',str(time.asctime(time.localtime()))
    print '=================================================================================================='
Ejemplo n.º 25
0
def search(params):
    client = IndeedClient(publisher=8201417039877332)
    res = client.search(**params)
    return res
Ejemplo n.º 26
0
class TestSearch:
    def setup(self):
        self.client = IndeedClient('8251007850639120')
        self.params = {
            'q': "python",
            'l': "austin",
            'userip': "1.2.3.4",
            'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)",
        }
        self.utils = Utils()

    def teardown(self):
        self.client = None
        self.params = None

    @with_setup(setup, teardown)
    def test_search(self):
        search_response = self.client.search(**self.params)
        assert type(search_response) is dict
        self.utils.output_to_file('sample', search_response)

    @with_setup(setup, teardown)
    def test_missing_one_required(self):
        del self.params['l']
        search_response = self.client.search(**self.params)
        assert type(search_response) is dict

    @with_setup(setup, teardown)
    @raises(IndeedClientException)
    def test_missing_both_required(self):
        del self.params['q']
        del self.params['l']
        search_esponse = self.client.search(**self.params)

    @with_setup(setup, teardown)
    @raises(IndeedClientException)
    def test_missing_userip(self):
        del self.params['userip']
        search_response = self.client.search(**self.params)

    @with_setup(setup, teardown)
    @raises(IndeedClientException)
    def test_missing_useragent(self):
        del self.params['useragent']
        search_response = self.client.search(**self.params)

    @with_setup(setup, teardown)
    def test_raw_json(self):
        self.params['raw'] = True
        search_response = self.client.search(**self.params)
        assert isinstance(search_response, basestring)
        assert type(json.loads(search_response)) is dict

    @with_setup(setup, teardown)
    def test_raw_xml_with_paramter(self):
        self.params['format'] = "xml"
        self.params['raw'] = True
        search_response = self.client.search(**self.params)
        assert isinstance(search_response, basestring)
        assert parseString(search_response)

    @with_setup(setup, teardown)
    def test_raw_xml_without_paramter(self):
        self.params['format'] = "xml"
        search_response = self.client.search(**self.params)
        assert isinstance(search_response, basestring)
        assert parseString(search_response)

    ''' Few Tests written by me '''

    @with_setup(setup, teardown)
    def test_search_extra(self):
        search_response = self.client.search(**self.params)
        assert type(search_response) is dict
        assert len(
            self.utils.find_all_jobs_not_contains_job_parameter(
                search_response, 'city', 'austin')) == 0
        assert len(self.utils.find_all_jobs_not_contains_job_parameter(search_response, 'country', 'US'))\
               == 0
        assert len(self.utils.find_all_jobs_not_contains_job_parameter(search_response, 'language', 'en')) \
               == 0
        assert self.utils.get_num_jobs(search_response) == 10

    @with_setup(setup, teardown)
    def test_sort(self):
        self.params['sort'] = "date"
        search_response = self.client.search(**self.params)
        assert type(search_response) is dict

    @with_setup(setup, teardown)
    def test_start(self):
        self.params['start'] = "2"
        search_response = self.client.search(**self.params)
        assert type(search_response) is dict

    @with_setup(setup, teardown)
    def test_limit(self):
        self.params['limit'] = "25"
        search_response = self.client.search(**self.params)
        assert type(search_response) is dict
        assert self.utils.get_num_jobs(search_response) == 25

    @with_setup(setup, teardown)
    def test_fromage(self):
        self.params['fromage'] = "2"
        search_response = self.client.search(**self.params)
        assert type(search_response) is dict

    @with_setup(setup, teardown)
    def test_limit(self):
        self.params['limit'] = "25"
        search_response = self.client.search(**self.params)
        assert type(search_response) is dict
        assert self.utils.get_num_jobs(search_response) == 25

    @with_setup(setup, teardown)
    def test_highlight(self):
        self.params['highlight'] = "1"
        search_response = self.client.search(**self.params)
        assert type(search_response) is dict

    @with_setup(setup, teardown)
    def test_duplicate(self):
        self.params['duplicate'] = "1"
        search_response = self.client.search(**self.params)
        assert type(search_response) is dict

    @with_setup(setup, teardown)
    def test_co(self):
        self.params['co'] = "ca"
        self.params['l'] = "toronto"
        search_response = self.client.search(**self.params)
        assert type(search_response) is dict

    @with_setup(setup, teardown)
    def test_invalid_limit(self):
        self.params['limit'] = '-100'
        search_response = self.client.search(**self.params)
        assert self.utils.get_num_jobs(search_response) == 0

    # trying a bunch of invalid parameters, I noticed that no error is thrown. Instead it seems to ignore. It this correct?
    # ie. negative fromage, string instead of ints and vs versa

    @with_setup(setup, teardown)
    def test_several_params(self):
        self.params['co'] = "ca"
        self.params['l'] = "toronto"
        self.params['duplicate'] = "1"
        self.params['highlight'] = "1"
        self.params['limit'] = "25"
        self.params['fromage'] = "10"
        self.params['start'] = "2"
        search_response = self.client.search(**self.params)
        assert type(search_response) is dict
        assert self.utils.get_num_jobs(search_response) == 25
Ejemplo n.º 27
0
# create  a session
Session = sessionmaker(bind=engine)
session = Session()

# publisher=5950869068484812
client = IndeedClient('5950869068484812')
params = {
    'q': "python",
    'l': "Palo Alto",
    'userip': "168.159.213.210",
    'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4)",
    'limit': "50",
    'sort': "date",
    'start': "0"
}
search_response = client.search(**params)
print search_response
# print search_response['results']
# use JSON editor online to view result
# http://www.jsoneditoronline.org/

with open('indeed_positions_json.txt', 'w') as outfile:
    jobs = json.dump(search_response, outfile)
    #jobs = json.load(search_response)

for key, value in search_response.iteritems():
    #print "%s: %s" % (key, value)
    if key == "results":
        res = value
        for index in value:
            #print i
Ejemplo n.º 28
0
    def get_api_results(self, desired_result_count=1):
        '''return job json objects from the indeed api.'''

        job_profile = CommonFuncs.get_job_profile()

        # GET LOCATION IN JOB PROFILE
        locations = CommonFuncs.get_locations_list(job_profile)

        # KEYWORDS CONNECTED BY OR
        query_list = CommonFuncs.build_query_string(job_profile=job_profile,
                                                    or_delim='or',
                                                    bracket1='(',
                                                    bracket2=')',
                                                    adv_supp=True)
        query_string = query_list[0]

        new_jobs_queue = queue.Queue(maxsize=0)
        new_jobs = None

        limit = '25'  # 25 is the max results per request
        lookback_period = '60'  # default lookback period
        client_id = {}
        api = None

        # CONNECT TO INDEED API FOR JOB QUERIES
        try:
            client_id = json.load(open(API_KEYS_PATH, 'r'))
            api = IndeedClient(publisher=client_id['publisher_id'])
        except:
            ValueError('No publisher id found. Filtering aborted.')

        filters = {
            'q': query_string,
            'l': '',
            'userip': "1.2.3.4",
            'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)",
            "raw": "False",
            "sort": "date",
            "radius": job_profile.radius,
            "limit": limit,
            "fromage": lookback_period,
        }

        # FIND NEW JOB JSON OBJECT USING INDEED API
        # GET NEW JOBS

        for location in locations:  # iterate over each location
            filters['l'] = location
            filters['q'] = query_string

            # THREAD-BRAINED APPROACH to get all results at once
            def get_results(i):
                '''get results and check against the db if they are new. add to queue if new'''
                filters['start'] = i
                temp_list = []
                # get 25 results, using provided filters with start index
                [
                    temp_list.append(x) for x in json.loads(
                        CommonFuncs.convertBytesToString(api.search(
                            **filters)))['results']
                ]
                [
                    new_jobs_queue.put(x) for x in temp_list
                    if new_jobs_queue.unfinished_tasks < desired_result_count
                ]

            result_count = int(
                json.loads(
                    CommonFuncs.convertBytesToString(
                        api.search(**filters)))['totalResults'])

            list_of_filter_starts = [
                str(i) for i in range(0, result_count, 25)
            ]  # build list of start positions

            for item in list_of_filter_starts:
                if not new_jobs_queue.unfinished_tasks < desired_result_count:
                    break
                get_results(item)

            new_jobs = list(
                new_jobs_queue.queue)  # append query results to list

        # RETURN JOBS
        if new_jobs:
            if desired_result_count == 1:  # just return a single job, not in a list
                return new_jobs[0]
            elif desired_result_count <= len(
                    new_jobs
            ):  # if we have more than enough new jobs, return those in a list
                return new_jobs[0:desired_result_count]
            else:  # if more than the available number of new jobs requested, return all that could be found
                return new_jobs
        else:
            return []  # if no new links found
Ejemplo n.º 29
0
class indeed:

    #jobDataFrame

    def __init__(self):
        #        self.jobDataFrame= pd.DataFrame();
        self.client = IndeedClient(8836246992678581)

    def skill(self, l, city, jobtype):
        #print l
        #print " AND ".join(l)
        print(jobtype)
        if jobtype in ['intern', 'internship', 'Internship']:
            jobtype = 'internship'
        else:
            jobtype = 'fulltime'
        params = {
            'q': " AND ".join(l),
            'l': city,
            'jt': jobtype,
            'userip': "1.2.3.4",
            'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)",
            'limit': "25",
            'start': 0,
            'highlight': 1
        }
        i = 25
        search_response = self.client.search(**params)
        results = []
        if (len(search_response['results']) <= 0):
            return results

        while (i < 100 and i < search_response['totalResults']):
            results += search_response['results']
            params['start'] += 25
            search_response = self.client.search(**params)
            results += search_response['results']
            i += 25
            print(params['start'])

        self.jobDataFrame = pd.DataFrame(results).drop_duplicates('jobkey')
        self.jobDataFrame.to_csv("sample.csv", encoding='UTF-8')
        return results

    def skillOR(self, l, city, jobtype):
        #print l
        #print " AND ".join(l)
        print(jobtype)
        if jobtype in ['intern', 'internship', 'Internship']:
            jobtype = 'internship'
        else:
            jobtype = 'fulltime'
        params = {
            'q': " OR ".join(l),
            'l': city,
            'jt': jobtype,
            'userip': "1.2.3.4",
            'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)",
            'limit': "50"
        }
        i = 25
        search_response = self.client.search(**params)
        results = []
        if (len(search_response['results']) <= 0):
            return results

        while (i < 100 and i < search_response['totalResults']):
            results += search_response['results']
            params['start'] += 25
            search_response = self.client.search(**params)
            results += search_response['results']
            i += 25
            print(params['start'])

        self.jobDataFrame = pd.DataFrame(results).drop_duplicates('jobkey')
        self.jobDataFrame.to_csv("sample.csv", encoding='UTF-8')
        return results
Ejemplo n.º 30
0
from indeed import IndeedClient
import csv

client = IndeedClient(publisher = 2186395790213512)

tot = []
for i in range(0, 8):
	params = {
	    'q' : "marketing",
	    'userip' : "1.2.3.4",
	    'useragent' : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)",
	    'format' : 'json',
	    'limit' : 25,
	    'start' : i*25
	}
	sr = client.search(**params)
	for j in range(0, len(sr['results'])):
		tot.append(sr['results'][j])

allJobs = []
for i in range(0, len(tot)):
	currJob = []
	currJob.append(tot[i]['jobtitle'].encode('ascii', 'ignore'))
	currJob.append(tot[i]['url'].encode('ascii', 'ignore'))
	currJob.append(tot[i]['city'].encode('ascii', 'ignore'))
	currJob.append(tot[i]['date'].encode('ascii', 'ignore'))
	currJob.append(tot[i]['company'].encode('ascii', 'ignore'))
	currJob.append(tot[i]['snippet'].encode('ascii', 'ignore'))
	currJob.append(tot[i]['source'].encode('ascii', 'ignore'))
	currJob.append(tot[i]['jobkey'].encode('ascii', 'ignore'))
	allJobs.append(currJob)
Ejemplo n.º 31
0
class TestSearch:
    def setup(self):
        self.client = IndeedClient("YOUR_PUBLISHER_NUMBER")
        self.params = {
            'q': "python",
            'l': "austin",
            'userip': "1.2.3.4",
            'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)",
        }

    def teardown(self):
        self.client = None
        self.params = None

    @with_setup(setup, teardown)
    def test_search(self):
        search_response = self.client.search(**self.params)
        assert type(search_response) is dict

    @with_setup(setup, teardown)
    def test_missing_one_required(self):
        del self.params['l']
        search_response = self.client.search(**self.params)
        assert type(search_response) is dict

    @with_setup(setup, teardown)
    @raises(IndeedClientException)
    def test_missing_both_required(self):
        del self.params['q']
        del self.params['l']
        search_response = self.client.search(**self.params)

    @with_setup(setup, teardown)
    @raises(IndeedClientException)
    def test_missing_userip(self):
        del self.params['userip']
        search_response = self.client.search(**self.params)

    @with_setup(setup, teardown)
    @raises(IndeedClientException)
    def test_missing_useragent(self):
        del self.params['useragent']
        search_response = self.client.search(**self.params)

    @with_setup(setup, teardown)
    def test_raw_json(self):
        self.params['raw'] = True
        search_response = self.client.search(**self.params)
        assert isinstance(search_response, basestring)
        assert type(json.loads(search_response)) is dict

    @with_setup(setup, teardown)
    def test_raw_xml_with_paramter(self):
        self.params['format'] = "xml"
        self.params['raw'] = True
        search_response = self.client.search(**self.params)
        assert isinstance(search_response, basestring)
        assert parseString(search_response)

    @with_setup(setup, teardown)
    def test_raw_xml_without_paramter(self):
        self.params['format'] = "xml"
        search_response = self.client.search(**self.params)
        assert isinstance(search_response, basestring)
        assert parseString(search_response)