Esempio n. 1
0
def report():
    word = request.args.get('word')
    if word:
        word = word.lower()
        existingJobs = db.get(word)
        if existingJobs:
            jobs = existingJobs
        else:
            jobs = get_jobs(word)
            db[word] = jobs
    else:
        return redirect('/')
    return render_template("report.html",
                           search=word,
                           resultNumber=len(jobs),
                           jobs=jobs)
Esempio n. 2
0
from indeed import get_jobs
from save import save_to_file

indeed_jobs = get_jobs()
save_to_file(indeed_jobs)
Esempio n. 3
0
this file is used to build the initial json corpus
'''

# Search settings
KEYWORD_FILTER = [
    'IT', 'Software', 'engineer', 'developer', 'scientist', 'computer',
    'researcher', 'technician', 'data', 'specialist', 'designer'
]
LOCATION_FILTER = ""

# Other settings
MAX_PAGES_COMPANIES = 1000
MAX_PAGES_REVIEWS = 100

import indeed

jobs = {}
id = 0
for key in KEYWORD_FILTER:
    id = indeed.get_jobs(key, LOCATION_FILTER, MAX_PAGES_COMPANIES, id, jobs)
    if id > 3000:
        break
#print jobs
reload(sys)
sys.setdefaultencoding('utf-8')

f = open("jobcorpusupdate.json", "w+")
jsontext = json.dumps(jobs, ensure_ascii=False, indent=4)
f.write(jsontext)
f.close()
Esempio n. 4
0
def get_indeed(old):

    return indeed.get_jobs(old)
Esempio n. 5
0
#now we override the one in httplib
httplib.HTTPSConnection = HTTPSConnection
# ssl_version corrections are done

""" 1) Scrap indeed.com for jobs listings
    2) Fill mongo b with results and reviews from indeed
    3) For each company found, scrap glassdoor for additional reviews
"""

# Search settings
KEYWORD_FILTER = "Data Scientist"
LOCATION_FILTER = "Boston, MA"
KWFLAGS = ["Hadoop", "years experience", "years' experience","years of experience"]

# Other settings
MAX_PAGES_COMPANIES = 500
MAX_PAGES_REVIEWS = 500

# DB settings
client = MongoClient()
indeed_db = client.indeed      #use indeed_db database
indeed_jobs = indeed_db.jobs   #create collection for jobs ads
indeed_reviews = indeed_db.reviews  # create collection for company reviews

"""1) scrap indeed for jobs"""
jobs = indeed.get_jobs(KEYWORD_FILTER, LOCATION_FILTER, indeed_jobs, MAX_PAGES_COMPANIES,KWFLAGS)

"""2) Get companies reviews into mongodb"""
indeed.get_all_company_reviews(jobs, indeed_reviews, MAX_PAGES_REVIEWS)