def getPostings(jobQuery, nURLs=1, start=0): """ return: jobkeys | list[string] | list of job postings unique ID allterms | list[list[string]] | list of list of words from job postings params: jobQuery: string | default empty string (generic job search) nJobs: int | number of job postings to search (default=499 (500 max allowed)) start: int | index to begin api url search """ # retrieve list of URL's for jobQuery urls = indeed.getJobURLs(jobQuery, nURLs=nURLs, start=start) # initialize lists for all terms and jobkeys allwords, jobkeys = [], [] # loop over urls for url in urls: # retrieve information from URL's job posting jobkey, position, company, location, words = indeed.parseJobPosting( url) # append current job posting info to allwords and jobkeys jobkeys.append(jobkey) allwords.append(words) # only care about the jobkey and terms list for skillrank database return jobkeys, allwords
def getPostings(jobQuery, nURLs=1, start=0): """ return: jobkeys | list[string] | list of job postings unique ID allterms | list[list[string]] | list of list of words from job postings params: jobQuery: string | default empty string (generic job search) nJobs: int | number of job postings to search (default=499 (500 max allowed)) start: int | index to begin api url search """ # retrieve list of URL's for jobQuery urls = indeed.getJobURLs(jobQuery, nURLs=nURLs, start=start) # initialize lists for all terms and jobkeys allwords, jobkeys = [], [] # loop over urls for url in urls: # retrieve information from URL's job posting jobkey, position, company, location, words = indeed.parseJobPosting(url) # append current job posting info to allwords and jobkeys jobkeys.append(jobkey) allwords.append(words) # only care about the jobkey and terms list for skillrank database return jobkeys, allwords
def getResults(jobQuery, nJobs, start=0): """ return: list[tuple(term,relevance,count)] | "results" params: jobQuery: string | job query from user form nJobs: int | number of jobs to consider start: int | index to start indeed.com api search """ # connect to the skillrank database and create cursor con = mdb.connect(host='localhost', user='******', db='skillrank') cur = con.cursor() # initialize list for all terms for jobQuery terms = [] # retrieve URL's for jobQuery urls = indeed.getJobURLs(jobQuery, nURLs=nJobs, start=start) # if no URL's matched for jobQuery if not urls: return [], [], '' # get indeed job postings using threads for boosted efficieny documents = indeed.threadResults(urls, nThreads=8) # words lists are the 5th/last item in each # tuple returned from threaded documents for d in documents: terms += d[-1] # retrieve ranked results results, biResults = analyze(cur, jobQuery, terms, x=0.6, nReturn=100, threshold=1) # create the results string resultsString = 'Based on '+str(len(terms))+' words scraped from ' resultsString += str(len(urls))+' job postings for "'+jobQuery+'"' # close the database cursor and connection if cur: cur.close() if con: con.close() return results, biResults, resultsString