def scrape_indeed(self, api_key, ip_address, places=None): indeed_client = IndeedClient(api_key) indeed_matched_jobs = [] seen_jobs = self.load_titles('indeed_jobs') if not places: places = ['san francisco, ca'] for place, term in [(place, term) for place in places for term in self.filters.keys()]: sys.stderr.write('Searching {} Indeed for {}... '.format(place, term)) # time.sleep(random.randrange(1, 3)) # throttle requests params = { 'q': term, 'l': place, 'userip': ip_address, 'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)", 'limit': 25} search_response = indeed_client.search(**params) job_results = search_response['results'] sys.stdout.write('returned {} items\n'.format(len(job_results))) for job in job_results: job_id = job['jobkey'] if job_id not in seen_jobs: seen_jobs.add(job_id) job_title = job['jobtitle'] if self.filter_title(job_title, self.filters[term]): indeed_matched_jobs.append([ job_title, job['formattedLocationFull'], job['url'], job['snippet']]) self.save_titles('indeed_jobs', seen_jobs) return indeed_matched_jobs
def get(self): """Interact with `get` request from front-end Currently only does Indeed, other APIs to take into consideration: USAjobs.gov, key: EwesKi7XhFETegcAroJCod5jeP9wwBkzanA1qatBMRY= AuthenticJobs.com, key: de1d14f970eaf280a271b1d5beffafe9 """ indeed_key = '4970113146490412' # not secure, coz repo, but whatevs client = IndeedClient(indeed_key) query = self.request.get_all("q") location = self.request.get_all("l") jobids = self.request.get_all("jobids") all = self.request.get_all("all") output = "" if query and not jobids: output = self.jobs(client, query, location) elif jobids and not query: output = client.jobs(tuple(jobids.split(','))) elif all != '': jobs = self.jobs(client, query, location) output = client.jobs(tuple([job.jobkey for job in jobs.results])) # for job in jobs get jobkey & use that to get all data then # colloide data on job key self.response.headers['Content-Type'] = 'application/json' self.response.out.write(json.dumps(output))
def generate_job_list(params,publisher_id): """ Returns list of jobs that match search criteria """ job_list = [] #since we initiated params['start'] at 0 total_results = 1 while int(params['start']) < total_results: client = IndeedClient(publisher = publisher_id) search_response = client.search(**params) root = ET.fromstring(search_response) params['start'] = str(int(params['start'])+25) total_results = int(root.find('totalresults').text) for job in root.iter('result'): jobtitle = job.find('jobtitle').text company = job.find('company').text city = job.find('city').text #state = job.find('state').text #country = job.find('country').text date = job.find('date').text snippet = job.find('snippet').text sponsored = job.find('sponsored').text url = job.find('url').text job = (unicode(jobtitle),unicode(company),unicode(city),unicode(date)[5:16].replace(" ","-"),unicode(sponsored), unicode(url)) if job not in job_list: job_list.append(job) job_list.insert(0,(unicode("jobtitle"),unicode("company"),unicode("city"),unicode("date"),unicode("sponsored"), unicode("url"))) #add header return job_list
def connect_indeed(self, config_filepath='indeed_cred.yml'): # Store in .ssh # yamload = yaml.load(open(config_filepath)) # credentials = yamload['indeed'] # pub_num = credentials.get('publisher_num') self.c = IndeedClient(publisher='4353162753214099') print('connect_indeed done')
def get_indeed_job_list(query, location, radius): client = IndeedClient(publisher=2863621289879018) progress_bar = pyprind.ProgBar(4, title='Searching For Jobs') results_pd = pd.DataFrame() for numb_results in range(0, 100, 25): params = { 'q': query, 'radius': radius, 'l': location, 'userip': "1.2.3.4", 'limit': '25', 'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)", 'start': numb_results } search_response = client.search(**params) results_pd = pd.concat( [results_pd, pd.DataFrame.from_dict(search_response['results'])], axis=0) progress_bar.update() if len(results_pd) == 0: sys.exit('Search did not return any jobs') results_pd.reset_index(drop=True, inplace=True) results_pd['date'] = pd.to_datetime(results_pd.date) results_pd.drop([ 'source', 'expired', 'country', 'formattedLocation', 'formattedLocationFull', 'onmousedown', 'stations', 'state', 'sponsored' ], axis=1, inplace=True) return results_pd # returns the search results as a pandas data frame
def job_search(self, job, location): # publisher=5950869068484812 client = IndeedClient('5950869068484812') #params = generate_advanced_query("python", "Boston", 1, 0, 25) params = self.generate_advanced_query(job, location, 1, 0, 25) search_response = client.search(**params) print "Search Response: %s" % search_response filename = 'indeed_positions_json.txt' self.write_json_to_file(filename, search_response) (positions, total) = self.extract_query_result(search_response) print total jobkeys = [] for position in positions: self.extract_position_info(position, jobkeys) #for i in range(len(jobkeys)): #print "range (%d: %s)" % (i, jobkeys[i]) #print '*' * 100 #job_response = client.jobs(jobkeys = "ad752ce9ae3f1b5e") #print job_response['results'] #print job_response #filename = 'indeed_positions_json.txt' #self.write_json_to_file(filename, job_response) return jobkeys
def setup(self): self.client = IndeedClient("YOUR_PUBLISHER_NUMBER") self.params = { 'q': "python", 'l': "austin", 'userip': "1.2.3.4", 'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)", }
def setup(self): self.client = IndeedClient('8251007850639120') self.params = { 'q': "python", 'l': "austin", 'userip': "1.2.3.4", 'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)", } self.utils = Utils()
class TestJobs(): def setup(self): self.utils = Utils() self.client = IndeedClient('8251007850639120') self.params = { 'jobkeys' : ("7c398c74a8f22c72", "d7802e9ce3b4af7d"), } def teardown(self): self.client = None self.params = None @with_setup(setup, teardown) def test_jobs(self): jobs_response = self.client.jobs(**self.params) assert type(jobs_response) is dict print jobs_response self.utils.output_to_file('output2', jobs_response) self.utils.open_with_subl('output2') self.utils.find_all_jobs_that_contains_job_parameter() # self.utils.output_to_file('sample.json', str(j)) # self.utils.open_with_subl('sample.json') @with_setup(setup, teardown) @raises(IndeedClientException) def test_missing_jobkeys(self): del self.params['jobkeys'] jobs_response = self.client.jobs(**self.params) @with_setup(setup, teardown) def test_raw_json(self): self.params['raw'] = True jobs_response = self.client.jobs(**self.params) assert isinstance(jobs_response, basestring) assert type(json.loads(jobs_response)) is dict @with_setup(setup, teardown) def test_raw_xml_with_paramter(self): self.params['format'] = "xml" self.params['raw'] = True jobs_response = self.client.jobs(**self.params) assert isinstance(jobs_response, basestring) assert parseString(jobs_response) @with_setup(setup, teardown) def test_raw_xml_without_paramter(self): self.params['format'] = "xml" jobs_response = self.client.jobs(**self.params) assert isinstance(jobs_response, basestring) assert parseString(jobs_response) '''New test cases not included in GIT''' # @with_setup(setup, teardown) # def test_invalid_jobkey
def indeed_urls(parameters, publisher_key=None): """Use Indeed publisher ID to retrieve URLs from the Indeed API.""" if publisher_key is None: publisher_key = os.environ['API_KEY'] client = IndeedClient(publisher_key) response = client.search(**parameters) try: urls = [str(links['url']) for links in response['results']] return urls except KeyError: raise NameError('Invalid Publisher ID')
def indeed_urls(parameters, publisher_key=None): """Use Indeed publisher ID to retrieve URLs from the Indeed API.""" if publisher_key is None: publisher_key = os.environ["API_KEY"] client = IndeedClient(publisher_key) response = client.search(**parameters) try: urls = [str(links["url"]) for links in response["results"]] return urls except KeyError: raise NameError("Invalid Publisher ID")
def Search(query, location, limit=10, start=0): client = IndeedClient(publisher=PUBLISHER_ID) params = { 'q': query, 'l': location, 'limit': limit, 'start': start, 'userip': "1.2.3.4", 'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)" } search_response = client.search(**params) return search_response
def get_data(): client = IndeedClient('7381316591612982') params = { 'q': "front end engineer", 'l': "austin", 'userip': "172.68.141.95", 'useragent': """Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36""", 'limit': 25 } search_response = client.search(**params) cities = [ 'New York, NY', 'Austin, TX', 'San Francisco, CA', 'Boston, MA', 'Chicago, IL', 'Miami, FL' ] jobs = [ 'Front End Engineer', 'Back End Engineer', 'Data Science', 'Product Management', 'Director of Engineering', 'Data Engineer', 'Data Analyst', 'Accounting', 'Marketing', 'Finance', 'Nurse', 'Doctor', 'Lawyer', 'Paralegal', 'sales', 'customer_service', 'human resources', 'executive assistant', 'operations', 'teacher', 'maintenance', 'security guards' ] res_list = ['jobs'] for c in cities: for j in jobs: params = { 'q': j, 'l': c, 'userip': "172.68.141.95", 'useragent': """Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36""", 'limit': 25 } search_response = client.search(**params) for res in search_response['results']: job_dict = {} if not res['expired']: job_dict['city'] = res['city'] job_dict['date_posted'] = res['date'] job_dict['company'] = res['company'] job_dict['title'] = res['jobtitle'] job_dict['url'] = res['url'] job_dict['job_id'] = res['jobkey'] job_dict['state'] = res['state'] job_dict['snippet'] = res['snippet'] res_list.append(job_dict) return res_list
def access_indeed_api(parameters, publisher_key=None): """Access the Indeed API using the given parameters and publisher key. Positional argument: parameters -- a dictionary of the parameters to send to Indeed's API Keyword argument: publisher_key -- the publisher key for Indeed's API, defaults to environment variable """ if publisher_key is None: publisher_key = os.environ['API_KEY'] client = IndeedClient(publisher_key) response = client.search(**parameters) return response
def search_with_api(self, params: dict): client = IndeedClient(publisher=self.user_config.INDEED_API_KEY) search_response = client.search(**params) total_number_hits = search_response['totalResults'] num_loops = int(total_number_hits / IndeedConstants.API.MAX_NUM_RESULTS_PER_REQUEST) counter_start = 0 print('Total number of hits: {0}'.format(total_number_hits)) count_jobs_added = 0 for i in range(0, num_loops): # We can get around MAX_NUM_RESULTS_PER_REQUEST by increasing our start location on each loop! params['start'] = counter_start search_response = client.search(**params) list_jobs = IndeedParser.get_jobs_from_response(search_response) for job in list_jobs: try: # TODO: This sucks, I'm just repeating myself... Job.create(key=job.key, website=job.website, link=job.link, title=job.title, company=job.company, city=job.city, state=job.state, country=job.country, location=job.location, posted_date=job.posted_date, expired=job.expired, easy_apply=job.easy_apply) count_jobs_added += 1 except peewee.IntegrityError as e: # TODO: Can I write a custom exception that catches UNIQUE Errors but not others? if 'UNIQUE' in str(e): pass else: print(str(e)) # Increment start counter_start += IndeedConstants.API.MAX_NUM_RESULTS_PER_REQUEST print('Added {0} new jobs'.format(count_jobs_added))
def get_job_description(input_skills): client = IndeedClient('7863709885041358') params = { 'q': input_skills, 'userip': "1.2.3.4", 'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)", 'limit': 25 } job_urls = [] search_response = client.search(**params) for job in search_response['results']: job_urls.append(job['url']) bunch_of_words = [] for each_url in job_urls: bunch_of_words.extend(text_cleaner(each_url)) return bunch_of_words
class TestJobs: def setup(self): self.client = IndeedClient("YOUR_PUBLISHER_NUMBER") self.params = { 'jobkeys' : ("5898e9d8f5c0593f", "c2c41f024581eae5"), } def teardown(self): self.client = None self.params = None @with_setup(setup, teardown) def test_jobs(self): jobs_response = self.client.jobs(**self.params) assert type(jobs_response) is dict @with_setup(setup, teardown) @raises(IndeedClientException) def test_missing_jobkeys(self): del self.params['jobkeys'] jobs_response = self.client.jobs(**self.params) @with_setup(setup, teardown) def test_raw_json(self): self.params['raw'] = True jobs_response = self.client.jobs(**self.params) assert isinstance(jobs_response, basestring) assert type(json.loads(jobs_response)) is dict @with_setup(setup, teardown) def test_raw_xml_with_paramter(self): self.params['format'] = "xml" self.params['raw'] = True jobs_response = self.client.jobs(**self.params) assert isinstance(jobs_response, basestring) assert parseString(jobs_response) @with_setup(setup, teardown) def test_raw_xml_without_paramter(self): self.params['format'] = "xml" jobs_response = self.client.jobs(**self.params) assert isinstance(jobs_response, basestring) assert parseString(jobs_response)
class TestJobs: def setup(self): self.client = IndeedClient("YOUR_PUBLISHER_NUMBER") self.params = { 'jobkeys': ("5898e9d8f5c0593f", "c2c41f024581eae5"), } def teardown(self): self.client = None self.params = None @with_setup(setup, teardown) def test_jobs(self): jobs_response = self.client.jobs(**self.params) assert type(jobs_response) is dict @with_setup(setup, teardown) @raises(IndeedClientException) def test_missing_jobkeys(self): del self.params['jobkeys'] jobs_response = self.client.jobs(**self.params) @with_setup(setup, teardown) def test_raw_json(self): self.params['raw'] = True jobs_response = self.client.jobs(**self.params) assert isinstance(jobs_response, basestring) assert type(json.loads(jobs_response)) is dict @with_setup(setup, teardown) def test_raw_xml_with_paramter(self): self.params['format'] = "xml" self.params['raw'] = True jobs_response = self.client.jobs(**self.params) assert isinstance(jobs_response, basestring) assert parseString(jobs_response) @with_setup(setup, teardown) def test_raw_xml_without_paramter(self): self.params['format'] = "xml" jobs_response = self.client.jobs(**self.params) assert isinstance(jobs_response, basestring) assert parseString(jobs_response)
def main(): # publisher=5950869068484812 client = IndeedClient('5950869068484812') params = generate_advanced_query("python", "Boston", 10, 0, 25) search_response = client.search(**params) #print search_response #filename = 'indeed_positions_json.txt' # write_json_to_file(filename, search_response) (positions, total) = extract_query_result(search_response) print total jobkeys = [] for position in positions: extract_position_info(position, jobkeys) for i in range(len(jobkeys)): print jobkeys[i]
def fetch_indeed_data(counties,search): from indeed import IndeedClient client = IndeedClient('6437444271691851') params = { 'q' : "analytics", 'l' : "bergen county, nj", 'userip' : "1.2.3.4", 'useragent' : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)", 'latlong' : 1, 'radius' : 10, 'fromage' : 7, 'limit' : 25 } params['q'] = search results = [] for county in counties: params['l'] = county results.append(client.search(**params)) return(results)
def main(): client = IndeedClient(PUB_ID) search_params = build_params(locations, JOB_QUERY) search_results = [] count = 1 for params in search_params: stdout.flush() stdout.write("\rHtml request: {}/{}".format(count, len(locations))) search_response = client.search(**params) search_results.append(search_response) count += 1 word_filter = ['and', 'to', 'the', 'of', 'a', 'in', 'with', 'you', 'on', 'that', 'are', 'will', 'is', 'your', 'for', 'we', 'from', 'an', 'be', 'have', 'or', 'just', 'can', 'also', 'how', 'at', 'as', 'do', 'other', 'should', 'what', 'us', 'this', 'it', 'if', 'get', '-', '&', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] count = 1 number_of_locations = len(search_results) word_map = Counter() for search in search_results: print "Currently on {}/{}".format(count, number_of_locations) if len(search['results']) == 0: print "Nothing found for: {}".format(search['location']) else: print "Attempting {}...".format(search['location']) for job in search['results']: url = job['url'] html = requests.get(url) word_list = pull_job_description(html.content) for word in word_list: if word.lower() not in word_filter: word_map[word.lower()] += 1 count += 1 save_to_file(OUTPUT_FILE, word_map)
def scrape_indeed(self, api_key, ip_address, places=None): indeed_client = IndeedClient(api_key) indeed_matched_jobs = [] seen_jobs = self.load_titles('indeed_jobs') if not places: places = ['san francisco, ca'] for place, term in [(place, term) for place in places for term in self.filters.keys()]: sys.stderr.write('Searching {} Indeed for {}... '.format( place, term)) # time.sleep(random.randrange(1, 3)) # throttle requests params = { 'q': term, 'l': place, 'userip': ip_address, 'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)", 'limit': 25 } search_response = indeed_client.search(**params) job_results = search_response['results'] sys.stdout.write('returned {} items\n'.format(len(job_results))) for job in job_results: job_id = job['jobkey'] if job_id not in seen_jobs: seen_jobs.add(job_id) job_title = job['jobtitle'] if self.filter_title(job_title, self.filters[term]): indeed_matched_jobs.append([ job_title, job['formattedLocationFull'], job['url'], job['snippet'] ]) self.save_titles('indeed_jobs', seen_jobs) return indeed_matched_jobs
class Threadr(object): def __init__(self, keyword, location): self.conn = boto.connect_s3() # Connecting to S3 self.bucket = self.conn.get_bucket( 'bucketofindeeds') # Accessing the correct bucket self.json_up = Key(self.bucket) # Make sure to name it. self.content_up = Key(self.bucket) # Make sure to name it. self.keyword = keyword self.location = location print('init done') def connect_indeed(self, config_filepath='indeed_cred.yml'): # Store in .ssh # yamload = yaml.load(open(config_filepath)) # credentials = yamload['indeed'] # pub_num = credentials.get('publisher_num') self.c = IndeedClient(publisher='4353162753214099') print('connect_indeed done') def parameters(self, keyword, location): # Make sure to try using multiple keywords ua = UserAgent(fallback='Your favorite Browser') self.params = { 'q': str(keyword), 'l': str(location), 'userip': requests.get("http://icanhazip.com").text, 'useragent': ua.random } print('parameters done') def job_search(self): self.response = self.c.search(**self.params) # This will return a json file. print(len(self.response['results']), 'jobs returned.') def send_json(self): self.json_up.key = 'indeed_jsons/test' self.json_up.set_contents_from_string(str(self.response) + '\n') print('Its Working.') def mine_that(self): self.connect_indeed() self.parameters(self.keyword, self.location) self.job_search() self.send_json()
from indeed import IndeedClient import time client = IndeedClient(publisher='') params = { 'q': "internship", 'l': "Zurich", 'userip': "1.2.3.4", 'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)", 'radius': 50, 'limit': 100, 'co': 'ch', 'sort': 'date' } search_response = client.search(**params) filename = 'jobs_' + str(time.localtime()[0]) + str(time.localtime()[1]) + str( time.localtime()[2]) + '.txt' with open(r'export path' + filename, 'w') as textfile: textfile.write('acquisition time: ' + str(time.localtime()[3]) + ':' + str(time.localtime()[4]) + '\n\n') for i in range(0, len(search_response)): reltime = search_response['results'][i]['formattedRelativeTime'] jobtitle = search_response['results'][i]['jobtitle'] company = search_response['results'][i]['company'] url = search_response['results'][i]['url'] textfile.write(reltime + '\t' + jobtitle + '\t company: ' + company + '\n' + url + '\n\n')
# Author: Jichao Sun ([email protected]) # Date: April 26, 2016 # Setup: pip install indeed # pip install requests --upgrade from bs4 import BeautifulSoup from indeed import IndeedClient #import threading, urllib2 import urllib, urllib2, re jichaoID = 278720823964828 client = IndeedClient(publisher=jichaoID) # If salary is non empty, then the ordering of jobs per query is preserved. # Thus can use difference between two queries to find jobs in salary range. # Jobs with no specified salaries are estimated def getRawJobs(what, where, count, jobType, radius, salary): if jobType not in [ "fulltime", "parttime", "contract", "internship", "temporary", "" ]: return [] results = [] params = { 'q': what + "+$" + salary, # Job keywords 'l': where, # Location as a string, 'jt': jobType, # Type of job, fulltime parttime contract etc...
#importin Indeed Python API module from indeed import IndeedClient client = IndeedClient(publisher=12254335) # we'll do this later parameters = { 'q': "python developer", 'l': "London, GB", 'sort': "date", 'fromage': "5", 'limit': "25", 'filter': "1", 'userip': "192.186.176.550:60409", 'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)" } # our main search function def get_offers(params): # perform search search_results = client.search(**params) # we want this to be a dictionary # loop through each offer element for elm in search_results['results']: offer = (elm['jobtitle'], elm['formattedLocation'], elm['snippet'], elm['url'], elm['indeedApply'], elm['jobkey'], elm['date'])
def search(params): client = IndeedClient(publisher=8201417039877332) res = client.search(**params) return res
# verifying indeed publisher number from indeed import IndeedClient client = IndeedClient(publisher = 'publisher_number') params = { 'q' : "software engineer", 'l' : "Chicago", 'sort' : "date", 'fromage' : "5", 'limit' : "50", 'filter' : "1", 'userip' : "ip_address", 'useragent' : "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36", } # main search function def get_offers(params): search_results = client.search(**params) #perform search for elm in search_results['results']: offer = (elm['jobtitle'], #parsing the offer elm['formattedLocation'], elm['formattedLocation'], elm['snippet'], elm['url'], elm['indeedApply'], elm['jobkey'], elm['date'])
User must provide a valid indeed.com PUBLISHER_NUMBER and one or more job keys. See this page to create an account (it was free and quick for me): http://www.indeed.com/publisher/ Get each job key by running the search_indeed_api.py program. ''' from indeed import IndeedClient import time site = 'indeed.com' client = IndeedClient('PUBLISHER_NUMBER') # Job keys copied from the response to the search program: jk_l = ["e8930d8d162c4b70", "6bb8f41ea97bd6f8"] job_response = client.jobs(jobkeys = (jk_l )) # Example job response, Mon2016_0418. # Entire response is a dictionary. # The key 'results' has value of a list of dicts, eg: # {u'version': 2, u'results': [{u'formattedRelativeTime': u'30+ days ago', ... # ''' Each dict in the list is one job record, as shown here: {u'formattedRelativeTime': u'30+ days ago', u'city': u'San Francisco',
def __init__(self): # self.jobDataFrame= pd.DataFrame(); self.client = IndeedClient(8836246992678581)
from indeed import IndeedClient client = IndeedClient('1439892112925001') params = { 'q': "python", 'l': "boston", 'userip': "1.2.3.4", 'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)" } url = 'http://www.indeed.com/viewjob?jk=89b6ad7a31f7c4be&qd=Edw9zLy29tPtf_aglDLrzkea4GYpkSu9Dn9RxMjGtc-Au7bNkAhEpP8509-8oVyQct6gb9Hh9FwGl317FwNQL73cXKONUJYtCg03YtTr2S0&indpubnum=1439892112925001&atk=1b94foutl5sn398g' import requets requests.get(url) jobkey = '89b6ad7a31f7c4be'
counties = [cs[0] for cs in counties_states] states = [cs[1] for cs in counties_states] counties_by_state = {} for (value, key) in counties_states: counties_by_state.setdefault(key, []) # key might exist already counties_by_state[key].append(value) counties_states_2 = [ str(c) + ', ' + str(state_dict_2[s]) for c, s in zip(counties, states) ] #### GET DATA FROM INDEED #### from indeed import IndeedClient client = IndeedClient(publisher=8924341972846274) query = 'data scientist' # Only search in our domain # Indeed search is supposed to be ANDed but results prove the contrary f = open('/data/w205/W205_final_storage/indeed/txt/indeed.txt', 'w') for county_state in counties_states_2: county = county_state.split(', ')[0] state = county_state.split(', ')[1] jobkeys = [] # To avoid duplicates (in a county) params = { 'q': query, 'l': county_state, 'userip': "1.2.3.4", 'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)",
class indeed: #jobDataFrame def __init__(self): # self.jobDataFrame= pd.DataFrame(); self.client = IndeedClient(8836246992678581) def skill(self, l, city, jobtype): #print l #print " AND ".join(l) print(jobtype) if jobtype in ['intern', 'internship', 'Internship']: jobtype = 'internship' else: jobtype = 'fulltime' params = { 'q': " AND ".join(l), 'l': city, 'jt': jobtype, 'userip': "1.2.3.4", 'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)", 'limit': "25", 'start': 0, 'highlight': 1 } i = 25 search_response = self.client.search(**params) results = [] if (len(search_response['results']) <= 0): return results while (i < 100 and i < search_response['totalResults']): results += search_response['results'] params['start'] += 25 search_response = self.client.search(**params) results += search_response['results'] i += 25 print(params['start']) self.jobDataFrame = pd.DataFrame(results).drop_duplicates('jobkey') self.jobDataFrame.to_csv("sample.csv", encoding='UTF-8') return results def skillOR(self, l, city, jobtype): #print l #print " AND ".join(l) print(jobtype) if jobtype in ['intern', 'internship', 'Internship']: jobtype = 'internship' else: jobtype = 'fulltime' params = { 'q': " OR ".join(l), 'l': city, 'jt': jobtype, 'userip': "1.2.3.4", 'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)", 'limit': "50" } i = 25 search_response = self.client.search(**params) results = [] if (len(search_response['results']) <= 0): return results while (i < 100 and i < search_response['totalResults']): results += search_response['results'] params['start'] += 25 search_response = self.client.search(**params) results += search_response['results'] i += 25 print(params['start']) self.jobDataFrame = pd.DataFrame(results).drop_duplicates('jobkey') self.jobDataFrame.to_csv("sample.csv", encoding='UTF-8') return results
def setup(self): self.client = IndeedClient("YOUR_PUBLISHER_NUMBER") self.params = { 'jobkeys' : ("5898e9d8f5c0593f", "c2c41f024581eae5"), }
from indeed import IndeedClient import csv client = IndeedClient(publisher = 2186395790213512) tot = [] for i in range(0, 8): params = { 'q' : "marketing", 'userip' : "1.2.3.4", 'useragent' : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)", 'format' : 'json', 'limit' : 25, 'start' : i*25 } sr = client.search(**params) for j in range(0, len(sr['results'])): tot.append(sr['results'][j]) allJobs = [] for i in range(0, len(tot)): currJob = [] currJob.append(tot[i]['jobtitle'].encode('ascii', 'ignore')) currJob.append(tot[i]['url'].encode('ascii', 'ignore')) currJob.append(tot[i]['city'].encode('ascii', 'ignore')) currJob.append(tot[i]['date'].encode('ascii', 'ignore')) currJob.append(tot[i]['company'].encode('ascii', 'ignore')) currJob.append(tot[i]['snippet'].encode('ascii', 'ignore')) currJob.append(tot[i]['source'].encode('ascii', 'ignore')) currJob.append(tot[i]['jobkey'].encode('ascii', 'ignore')) allJobs.append(currJob)
return results # ======================== Q U E R Y P A R A M E T E R S ================================== params['fromage']='any' params['radius'] = 0 params['q'] = "" params['highlight'] = 0 params['jobtitle'] #Alteryx OR Cassandra OR Clojure OR Cloudera OR D3 OR Elasticsearch# OR GraphLab OR Dato OR Hadoop OR PureData OR SPSS OR Julia OR Kafka OR Looker OR Medidata OR MongoDB OR Neo4j OR NLTK OR NumPy OR Orange OR pandas OR Pentaho OR Pig OR PostGIS OR PostgreSQL OR Python OR R OR edis OR Redshift OR BusinessObjects OR SAS OR Scala OR scikit-learn OR SciPy OR Spark OR SPSS OR SQL OR Stata OR Storm OR Tableau OR Vertica OR Vowpal" compincr = 25 complevels = 11 ## ================================== A P Is ============================================== indeedapi = IndeedClient(publisher='7423517030312598') mongoclient = MongoClient() db=mongoclient.fluxx def alchemy(subjectURL,endpoint,subdict): # ENDPOINTS: # URLGetRankedNamedEntities ---- status, usage, url, language, entities, ['entities'] in results iterable # URLGetPubDate --- ['publicationDate']['date'] in results single value # URLGetRankedTaxonomy -- ['taxonomy'] in results iterable # URLGetRankedConcepts -- ['concepts'] in results iterable # URLGetRelations -- ['relations'] in results iterable --- sentiment and entities = 1 (does it apply to the calls above?) # URLGetText -- ['text'] in results single value key= '86bc3f87cb329d5be230ebb58d3b6c05f52e2417' query = urllib.quote(subjectURL) user_agent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; FDM; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 1.1.4322)' url = "http://access.alchemyapi.com/calls/url/" + endpoint + "?url=" + subjectURL + '&apikey=' + key + '&outputMode=json'
class TestSearch: def setup(self): self.client = IndeedClient('8251007850639120') self.params = { 'q': "python", 'l': "austin", 'userip': "1.2.3.4", 'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)", } self.utils = Utils() def teardown(self): self.client = None self.params = None @with_setup(setup, teardown) def test_search(self): search_response = self.client.search(**self.params) assert type(search_response) is dict self.utils.output_to_file('sample', search_response) @with_setup(setup, teardown) def test_missing_one_required(self): del self.params['l'] search_response = self.client.search(**self.params) assert type(search_response) is dict @with_setup(setup, teardown) @raises(IndeedClientException) def test_missing_both_required(self): del self.params['q'] del self.params['l'] search_esponse = self.client.search(**self.params) @with_setup(setup, teardown) @raises(IndeedClientException) def test_missing_userip(self): del self.params['userip'] search_response = self.client.search(**self.params) @with_setup(setup, teardown) @raises(IndeedClientException) def test_missing_useragent(self): del self.params['useragent'] search_response = self.client.search(**self.params) @with_setup(setup, teardown) def test_raw_json(self): self.params['raw'] = True search_response = self.client.search(**self.params) assert isinstance(search_response, basestring) assert type(json.loads(search_response)) is dict @with_setup(setup, teardown) def test_raw_xml_with_paramter(self): self.params['format'] = "xml" self.params['raw'] = True search_response = self.client.search(**self.params) assert isinstance(search_response, basestring) assert parseString(search_response) @with_setup(setup, teardown) def test_raw_xml_without_paramter(self): self.params['format'] = "xml" search_response = self.client.search(**self.params) assert isinstance(search_response, basestring) assert parseString(search_response) ''' Few Tests written by me ''' @with_setup(setup, teardown) def test_search_extra(self): search_response = self.client.search(**self.params) assert type(search_response) is dict assert len( self.utils.find_all_jobs_not_contains_job_parameter( search_response, 'city', 'austin')) == 0 assert len(self.utils.find_all_jobs_not_contains_job_parameter(search_response, 'country', 'US'))\ == 0 assert len(self.utils.find_all_jobs_not_contains_job_parameter(search_response, 'language', 'en')) \ == 0 assert self.utils.get_num_jobs(search_response) == 10 @with_setup(setup, teardown) def test_sort(self): self.params['sort'] = "date" search_response = self.client.search(**self.params) assert type(search_response) is dict @with_setup(setup, teardown) def test_start(self): self.params['start'] = "2" search_response = self.client.search(**self.params) assert type(search_response) is dict @with_setup(setup, teardown) def test_limit(self): self.params['limit'] = "25" search_response = self.client.search(**self.params) assert type(search_response) is dict assert self.utils.get_num_jobs(search_response) == 25 @with_setup(setup, teardown) def test_fromage(self): self.params['fromage'] = "2" search_response = self.client.search(**self.params) assert type(search_response) is dict @with_setup(setup, teardown) def test_limit(self): self.params['limit'] = "25" search_response = self.client.search(**self.params) assert type(search_response) is dict assert self.utils.get_num_jobs(search_response) == 25 @with_setup(setup, teardown) def test_highlight(self): self.params['highlight'] = "1" search_response = self.client.search(**self.params) assert type(search_response) is dict @with_setup(setup, teardown) def test_duplicate(self): self.params['duplicate'] = "1" search_response = self.client.search(**self.params) assert type(search_response) is dict @with_setup(setup, teardown) def test_co(self): self.params['co'] = "ca" self.params['l'] = "toronto" search_response = self.client.search(**self.params) assert type(search_response) is dict @with_setup(setup, teardown) def test_invalid_limit(self): self.params['limit'] = '-100' search_response = self.client.search(**self.params) assert self.utils.get_num_jobs(search_response) == 0 # trying a bunch of invalid parameters, I noticed that no error is thrown. Instead it seems to ignore. It this correct? # ie. negative fromage, string instead of ints and vs versa @with_setup(setup, teardown) def test_several_params(self): self.params['co'] = "ca" self.params['l'] = "toronto" self.params['duplicate'] = "1" self.params['highlight'] = "1" self.params['limit'] = "25" self.params['fromage'] = "10" self.params['start'] = "2" search_response = self.client.search(**self.params) assert type(search_response) is dict assert self.utils.get_num_jobs(search_response) == 25
from indeed import IndeedClient import pymysql from database import addToDatabase client = IndeedClient(publisher = ***************) parameters = {'q' : "python developer", 'l' : "India", 'sort' : "date", 'fromage' : "5", 'limit' : "25", 'filter' : "1", 'userip' : "192.186.176.550:60409", 'useragent' : "Mozilla/5.0" } def get_offers(params): search_results = client.search(**search_params) for elm in search_results['results']: offer = (elm['jobtitle'], elm['formattedLocation'], elm['snippet'], elm['url'], elm['indeedApply'], elm['jobkey'], elm['date']) addToDatabase(offer) def searchAllCities(): current_city = 0
def setup(self): self.client = IndeedClient("YOUR_PUBLISHER_NUMBER") self.params = { 'jobkeys': ("5898e9d8f5c0593f", "c2c41f024581eae5"), }
#This is for the mail client, by which we will be able to get user base updates app.config.update( DEBUG=True, #Email settings MAIL_SERVER='smtp.gmail.com', MAIL_PORT=465, MAIL_USE_SSL=True, MAIL_USERNAME = credentials.my_email_username, MAIL_PASSWORD = credentials.my_email_password ) mail=Mail(app) #Creating the clients to interact with the APIs twilio_api = TwilioRestClient(credentials.my_twilio_account_sid, credentials.my_twilio_auth_token) #Twilio indeed_api = IndeedClient(publisher = credentials.my_indeed_publisher_id) #Indeed #Client to shorten links with TinyURL shortener = Shortener('Tinyurl', timeout=86400) #Function to find and deliver jobs for each user in the jQuery file. This function is called daily as well as whenever the "admin" user sends a text to the endpoint with the word 'override' def FindAndDeliverJobs(): #Opening up the json file with all the users for reading with open('user_info.json', "r") as load_file: user_list = json.load(load_file) #Loop to iterate through every user inside the json file for user in user_list: #Only look up jobs for the user if they have confirmed their number if user['confirmed'] == 1: #Initializing the parameters for the Indeed search using the users preferences
def indeedAPI2(defTask): params = {} params['userip'] = "1.2.3.4", params['useragent'] = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0)" params['start'] = 1 params['latlong'] = 1 params['as_ttl'] = '' params['limit'] = 25 params['fromage']='any' params['radius'] = 0 params['q'] = '' params['highlight'] = 0 params['jobtitle'] = '' compincr = 25 complevels = 11 indeedapi = IndeedClient(publisher='7423517030312598') print params print 'START:',str(time.asctime(time.localtime())) newJobs = 0 expiredJobs = 0 # states=["AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID","IL","IN","IA","KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT","VA","WA","WV","WI","WY"] states = ['Climate and Land Use Alliance','John S. and John L. Knight Foundation','Cynthia and George Mitchell Foundation','Atlantic Philanthropies','Council of State Governments','Leukemia & Lymphoma Society','John D and Catherine T. MacArthur Foundation','Unbound Philanthropy','Garfield Foundation','Freedom House','Wikimedia Foundation','AFDO','Getty Foundation','Altman Foundation','Colorado Trust','Jessie Ball duPont Fund','Arthur Vining Davis Foundations','The Christensen Fund','Rita Allen Foundation','NBA Legends','Trio Foundation of St. Louis','Surdna Foundation','Kresge Foundation','Carnegie Corporation of New York','Central Valley Community Foundation','Democracy Fund','Committee to Protect Journalists','American Cancer Society','Winthrop Rockefeller Foundation','Walter and Elise Haas Fund','ClimateWorks Foundation','Zellerbach Family Foundation','Hillman Family Foundations','Bosch Community Fund','The Scan Foundation','Hogg Foundation','Unitarian Universalist Service Committee','Whole Foods Market','Open Road Foundation','Max M. & Marjorie S. Fisher Foundation','ArtPlace America','Grace and Mercy Foundation','Alliance for Early Success','The New York Womens Foundation','DentaQuest','ECMC Foundation','Great Lakes Higher Education Guaranty','The J. Willard and Alice S. Marriott Foundation','Indiana Historical Society','Wallace H. Coulter Foundation'] oldlist=[] newlist=[] runStart = time.time() print params for job in db.fluxxJobs.find(): oldlist.append(job['jobkey']) for state in states: print state params['company'] = state for c in range(complevels): params['salary'] = "$" + str(c * 25) + "K-$" + str(((c+1) * 25)-1) + "K" if c == (complevels-1): params['salary'] = "$" + str(c*compincr) + "K" sr = indeedapi.search(**params) tr = sr['totalResults'] ps = params['salary'].replace("$","") for apirequests in range((tr/compincr)+1): params['start'] = (apirequests * compincr) sr = indeedapi.search(**params) for joblisting in sr['results']: jobListing = json.loads(json.dumps(joblisting)) newlist.append(jobListing['jobkey']) if joblisting['jobkey'] not in oldlist: newJobs += 1 listed = joblisting['date'].replace('GMT','UTC') joblisting['dateOpen'] = datetime.strftime(datetime.strptime(listed, '%a, %d %b %Y %H:%M:%S %Z'), '%Y%m%d %H:%M:%S') joblisting['datesOpen'] = timeDictStamp('') joblisting['_id'] = joblisting['jobkey'] joblisting['status'] = 'Open' joblisting['searchparams'] = params joblisting['searchparams']['procTime']=datetime.now().strftime("%Y-%m-%d %H:%M:%S") joblisting['searchparams']['totalResults'] = tr joblisting['compMin'] = c*25000 joblisting['compMax'] = (c+1) * 25000 joblisting['compRange'] = params['salary'] if joblisting['city'] == "": del joblisting['city'] if joblisting['state'] == "": del joblisting['state'] job = joblisting jobID=job['_id'] Title=job['jobtitle'][0:60].replace("'","") Company=job['company'].replace("'","").encode('latin-1','ignore') if 'source' in job: Source=job['source'].replace("'","") else: Source = '' Description=job['snippet'][0:250].replace("'","").encode('latin-1','ignore').replace("'","") Description = cleanup(Description,{'<b>':'','</b>':'','<B>':'','</B>':''}) listed = job['date'].replace('GMT','UTC') job['dateListed'] = datetime.strftime(datetime.strptime(listed, '%a, %d %b %Y %H:%M:%S %Z'), '%Y%m%d %H:%M:%S') CompRange_min=job['compMin'] CompRange_max=job['compMax'] CompRange=job['compRange'] textURL = "http://www.indeed.com/viewjob?jk=" + job['jobkey'] jd = {} jd['jdText'] = alchemy(textURL,'URLGetText','text') jd['jdConcepts'] = alchemy(textURL,'URLGetRankedConcepts','concepts') jd = json.loads(json.dumps(jd)) job['jobDescription'] = jd if not db.fluxxJobs.find_one({'jobkey':job['jobkey']}): db.grantsJobs.save(job) delisted = set(oldlist).difference(set(newlist)) for jobkey in delisted: expiredJobs+=1 rightnow = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") upResult = db.grantsJobs.update({'jobkey':jobkey},{'$set':{'dateClosed':rightnow,'datesClosed':timeDictStamp(),'status':'Closed','expired':'true'}}) print " " print 'FINISH:',str(time.asctime(time.localtime())) print '=================================================================================================='
class indeed: #jobDataFrame def __init__(self): # self.jobDataFrame= pd.DataFrame(); self.client = IndeedClient(8836246992678581) def skill(self, l, city, jobtype): #print l #print " AND ".join(l) print(jobtype) if jobtype in ['intern', 'internship', 'Internship']: jobtype = 'internship' else: jobtype = 'fulltime' params = { 'q': " AND ".join(l), 'l': city, 'jt': jobtype, 'userip': "1.2.3.4", 'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)", 'limit': "25", 'start': 0, 'highlight': 1 } i = 25 search_response = self.client.search(**params) results = [] if (len(search_response['results']) <= 0): return results while (i < 100 and i < search_response['totalResults']): results += search_response['results'] params['start'] += 25 search_response = self.client.search(**params) results += search_response['results'] i += 25 print(params['start']) self.jobDataFrame = pd.DataFrame(results).drop_duplicates('jobkey') self.jobDataFrame.to_csv("sample.csv", encoding='UTF-8') return results def skillOR(self, l, city, jobtype): #print l #print " AND ".join(l) print(jobtype) if jobtype in ['intern', 'internship', 'Internship']: jobtype = 'internship' else: jobtype = 'fulltime' params = { 'q': " OR ".join(l), 'l': city, 'jt': jobtype, 'userip': "1.2.3.4", 'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)", 'limit': "50" } i = 25 search_response = self.client.search(**params) results = [] if (len(search_response['results']) <= 0): return results while (i < 100 and i < search_response['totalResults']): results += search_response['results'] params['start'] += 25 search_response = self.client.search(**params) results += search_response['results'] i += 25 print(params['start']) self.jobDataFrame = pd.DataFrame(results).drop_duplicates('jobkey') self.jobDataFrame.to_csv("sample.csv", encoding='UTF-8') return results def similarJobs(self, job): print("the job is" + job) sampledfo = pd.read_csv("sample.csv", encoding='UTF-8') sampledf = sampledfo.copy() del sampledf['stations'] del sampledf['Unnamed: 0'] del sampledf['source'] del sampledf['onmousedown'] del sampledf['formattedLocation'] del sampledf['formattedLocationFull'] del sampledf['url'] del sampledf['date'] del sampledf['formattedRelativeTime'] sampledf['indeedApply'] = [ 0 if x == 'false' else 1 for x in sampledf['indeedApply'] ] sampledf['expired'] = [ 0 if x == 'false' else 1 for x in sampledf['expired'] ] sampledf['sponsored'] = [ 0 if x == 'false' else 1 for x in sampledf['sponsored'] ] jobNo = job self.dataJob = sampledf.loc[sampledf['jobkey'] == jobNo] df = sampledf[sampledf["jobkey"] != jobNo] # df[''] = ['red' if x == 'Z' else 'green' for x in df['Set']] df.ix[df.city == self.dataJob.city.iloc[0], ['city', 'country', 'state']] = 1 df.ix[df.city != 1, ['city', 'country', 'state']] = 0 df.ix[df.company == self.dataJob.company.iloc[0], ['company']] = 1 df.ix[df.company != 1, ['company']] = 0 # df[''] = df.apply(my_test2, axis=1) df['snippet'] = [ textSim.cosine_sim(x, self.dataJob.snippet.iloc[0]) for x in df['snippet'] ] df['jobtitle'] = [ textSim.cosine_sim(x, self.dataJob.jobtitle.iloc[0]) for x in df['jobtitle'] ] df['variance'] = df['city'] + df['company'] + df['country'] + df[ 'expired'] + df[ 'indeedApply'] + 10 * df['snippet'] + 5 * df['jobtitle'] result = df.sort(['variance'], ascending=False) #import pdb; pdb.set_trace() simList = result['jobkey'][:10].tolist() simDict = [] for x in simList: s = sampledfo.loc[sampledfo['jobkey'] == x] simDict.append(s.to_dict(orient='records')[0]) return simDict
def indeedAPI(defTask): params = {} params['userip'] = "1.2.3.4", params['useragent'] = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0)" params['start'] = 1 params['latlong'] = 1 params['as_ttl'] = '' params['limit'] = 25 params['fromage']='any' params['radius'] = 0 params['q'] = "grants+management" params['highlight'] = 0 params['jobtitle'] = '' compincr = 25 complevels = 11 indeedapi = IndeedClient(publisher='7423517030312598') print print 'START:',str(time.asctime(time.localtime())) newJobs = 0 expiredJobs = 0 states=["AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID","IL","IN","IA","KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT","VA","WA","WV","WI","WY"] oldlist=[] newlist=[] runStart = time.time() for job in db.fluxxJobs.find(): oldlist.append(job['jobkey']) for state in states: print state, params['l'] = state for c in range(complevels): params['salary'] = "$" + str(c * 25) + "K-$" + str(((c+1) * 25)-1) + "K" if c == (complevels-1): params['salary'] = "$" + str(c*compincr) + "K" sr = indeedapi.search(**params) tr = sr['totalResults'] ps = params['salary'].replace("$","") for apirequests in range((tr/compincr)+1): params['start'] = (apirequests * compincr) sr = indeedapi.search(**params) for joblisting in sr['results']: jobListing = json.loads(json.dumps(joblisting)) newlist.append(jobListing['jobkey']) if joblisting['jobkey'] not in oldlist: newJobs += 1 listed = joblisting['date'].replace('GMT','UTC') joblisting['dateOpen'] = datetime.strftime(datetime.strptime(listed, '%a, %d %b %Y %H:%M:%S %Z'), '%Y%m%d %H:%M:%S') joblisting['datesOpen'] = timeDictStamp('') joblisting['_id'] = joblisting['jobkey'] joblisting['status'] = 'Open' joblisting['searchparams'] = params joblisting['searchparams']['procTime']=datetime.now().strftime("%Y-%m-%d %H:%M:%S") joblisting['searchparams']['totalResults'] = tr joblisting['compMin'] = c*25000 joblisting['compMax'] = (c+1) * 25000 joblisting['compRange'] = params['salary'] if joblisting['city'] == "": del joblisting['city'] if joblisting['state'] == "": del joblisting['state'] job = joblisting jobID=job['_id'] Title=job['jobtitle'][0:60].replace("'","") Company=job['company'].replace("'","").encode('latin-1','ignore') if 'source' in job: Source=job['source'].replace("'","") else: Source = '' Description=job['snippet'][0:250].replace("'","").encode('latin-1','ignore').replace("'","") Description = cleanup(Description,{'<b>':'','</b>':'','<B>':'','</B>':''}) listed = job['date'].replace('GMT','UTC') job['dateListed'] = datetime.strftime(datetime.strptime(listed, '%a, %d %b %Y %H:%M:%S %Z'), '%Y%m%d %H:%M:%S') CompRange_min=job['compMin'] CompRange_max=job['compMax'] CompRange=job['compRange'] textURL = "http://www.indeed.com/viewjob?jk=" + job['jobkey'] jd = {} jd['jdText'] = alchemy(textURL,'URLGetText','text') jd['jdConcepts'] = alchemy(textURL,'URLGetRankedConcepts','concepts') jd = json.loads(json.dumps(jd)) job['jobDescription'] = jd if not db.fluxxJobs.find_one({'jobkey':job['jobkey']}): db.grantsJobs.save(job) delisted = set(oldlist).difference(set(newlist)) for jobkey in delisted: expiredJobs+=1 rightnow = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") upResult = db.grantsJobs.update({'jobkey':jobkey},{'$set':{'dateClosed':rightnow,'datesClosed':timeDictStamp(),'status':'Closed','expired':'true'}}) print " " print 'FINISH:',str(time.asctime(time.localtime())) print '=================================================================================================='
def fullmap(): gmaps = googlemaps.Client(key="AIzaSyAx1j38VITDr2p2-VclAyX8pSOp7C_1-kM") lctn = gmaps.geolocate() #reverse = gmaps.reverse_geocode(latlng = [lctn['location']['lat'],lctn['location']['lng']] ) client = IndeedClient('1905750874242217') params = { 'q': "python", 'l': "Kharkiv", 'userip': "1.2.3.4", 'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)", 'co': "UA", 'latlong': 1, 'start': 0, 'limit': 25 } search_response = client.search(**params) jobs = json_normalize(search_response['results']) jobs_markers = [{ 'icon': '//maps.google.com/mapfiles/ms/icons/blue-dot.png', 'lat': lctn['location']['lat'], 'lng': lctn['location']['lng'], 'infobox': "My Location" }] for index, row in jobs.iterrows(): get_address = gmaps.places(query=row['company'] + ' ' + row['city'], location=str(lctn['location']['lat']) + ',' + str(lctn['location']['lng'])) company = json_normalize(get_address['results']) for index, row_company in company.iterrows(): jobs_markers.append({ 'icon': '//maps.google.com/mapfiles/ms/icons/red-dot.png', 'lat': row_company['geometry.location.lat'], 'lng': row_company['geometry.location.lng'], 'infobox': row['company'] + ' - ' + row_company['formatted_address'] + ' snippet:' + row['snippet'] }) #ltn = location() fullmap = Map( identifier="fullmap", varname="fullmap", style=("height:70%;" "width:99%;" "top:50;" "left:10;" "position:absolute;" "z-index:200;"), lat=lctn['location']['lat'], lng=lctn['location']['lng'], markers=jobs_markers, # maptype = "TERRAIN", zoom="11", #cluster=True fit_markers_to_bounds=True) return render_template('example_fullmap.html', fullmap=fullmap, GOOGLEMAPS_KEY=request.args.get('apikey'))
import json from indeed import IndeedClient from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker from database import Positions engine = create_engine('sqlite:///jobs.db', echo=True) # create a session Session = sessionmaker(bind=engine) session = Session() # publisher=5950869068484812 client = IndeedClient('5950869068484812') params = { 'q': "python", 'l': "Palo Alto", 'userip': "168.159.213.210", 'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4)", 'limit': "50", 'sort': "date", 'start': "0" } search_response = client.search(**params) print search_response # print search_response['results'] # use JSON editor online to view result # http://www.jsoneditoronline.org/ with open('indeed_positions_json.txt', 'w') as outfile: jobs = json.dump(search_response, outfile)