Ejemplo n.º 1
0
def getPostings(jobQuery, nURLs=1, start=0):
    """
    return: jobkeys  | list[string] | list of job postings unique ID
            allterms | list[list[string]] | list of list of words from job postings
    params:
         jobQuery: string | default empty string (generic job search)
            nJobs: int | number of job postings to search (default=499 (500 max allowed))
            start: int | index to begin api url search
    """
    # retrieve list of URL's for jobQuery
    urls = indeed.getJobURLs(jobQuery, nURLs=nURLs, start=start)

    # initialize lists for all terms and jobkeys
    allwords, jobkeys = [], []

    # loop over urls
    for url in urls:

        # retrieve information from URL's job posting
        jobkey, position, company, location, words = indeed.parseJobPosting(
            url)

        # append current job posting info to allwords and jobkeys
        jobkeys.append(jobkey)
        allwords.append(words)

    # only care about the jobkey and terms list for skillrank database
    return jobkeys, allwords
Ejemplo n.º 2
0
def getPostings(jobQuery, nURLs=1, start=0):
    """
    return: jobkeys  | list[string] | list of job postings unique ID
            allterms | list[list[string]] | list of list of words from job postings
    params:
         jobQuery: string | default empty string (generic job search)
            nJobs: int | number of job postings to search (default=499 (500 max allowed))
            start: int | index to begin api url search
    """
    # retrieve list of URL's for jobQuery
    urls = indeed.getJobURLs(jobQuery, nURLs=nURLs, start=start)
    
    # initialize lists for all terms and jobkeys
    allwords, jobkeys = [], []
    
    # loop over urls
    for url in urls:
        
        # retrieve information from URL's job posting
        jobkey, position, company, location, words = indeed.parseJobPosting(url)
        
        # append current job posting info to allwords and jobkeys
        jobkeys.append(jobkey)
        allwords.append(words)
        
    # only care about the jobkey and terms list for skillrank database
    return jobkeys, allwords
Ejemplo n.º 3
0
def getResults(jobQuery, nJobs, start=0):
    """
    return: list[tuple(term,relevance,count)] | "results"
    params:
         jobQuery: string | job query from user form
            nJobs: int | number of jobs to consider
            start: int | index to start indeed.com api search
    """
    # connect to the skillrank database and create cursor
    con = mdb.connect(host='localhost', user='******', db='skillrank')
    cur = con.cursor()
    
    # initialize list for all terms for jobQuery
    terms = []
    
    # retrieve URL's for jobQuery
    urls = indeed.getJobURLs(jobQuery, nURLs=nJobs, start=start)
    
    # if no URL's matched for jobQuery
    if not urls: return [], [], ''
    
    # get indeed job postings using threads for boosted efficieny
    documents = indeed.threadResults(urls, nThreads=8)
        
    # words lists are the 5th/last item in each
    # tuple returned from threaded documents
    for d in documents:
        terms += d[-1]
            
    # retrieve ranked results
    results, biResults = analyze(cur, jobQuery, terms, x=0.6, nReturn=100, threshold=1)
    
    # create the results string
    resultsString  = 'Based on '+str(len(terms))+' words scraped from '
    resultsString += str(len(urls))+' job postings for "'+jobQuery+'"'
    
    # close the database cursor and connection
    if cur: cur.close()
    if con: con.close()
    
    return results, biResults, resultsString