def translate(company_name): conn = getMongoConnection() db = conn[config.Mongo_DB_NAME] while db[company_name].find({"translated": "false"}).count() > 0: job = db[company_name].find_one({"translated": "false"}) try: lan = detect(unicode(job['job_summary'], "utf-8")) except: lan = detect(job['job_summary']) print lan print job if str(lan) != 'en': for key in job: if key == "city" or key == "snippet": job[key] = translator.translate(job[key], lang_to="en") db[company_name].update({"url": job["url"]}, {"$set": { key: job[key] }}, upsert=False, multi=False) if key == "job_summary": flag = 0 try: job[key] = translator.translate(unicode( job[key], "utf-8"), lang_to="en") flag = 1 except: job[key] = job[key].encode("ascii", "ignore") job[key] = translator.translate(job[key], lang_to="en") flag = 1 else: print "couldn't translate %s" % job['url'] print flag if flag == 1: db[company_name].update( {"url": job["url"]}, {"$set": { "job_summary": job[key] }}, upsert=False, multi=False) db[company_name].update({"url": job["url"]}, {"$set": { "translated": "true" }}, upsert=False, multi=False)
def translate(company_name): conn = getMongoConnection() db = conn[config.Mongo_DB_NAME] print db print "something yar" while db[company_name].find({"translated": True}).count() > 0: job = db[company_name].find_one({"translated": True}) lan = detect(job['snippet']) if str(lan) != 'en': for key in job: if key == "city" or key == "snippet": job[key] = translator.translate(job[key], lang_to="en") db[company_name].update({"url": job["url"]}, {"$set": { key: job[key] }}, upsert=False, multi=False) if key == "job_summary": flag = 0 try: job[key] = translator.translate(unicode( job[key], "utf-8"), lang_to="en") flag = 1 except: job[key] = translator.translate(job[key], lang_to="en") flag = 1 else: print "couldn't translate %s" % job['url'] print flag if flag == 1: db[company_name].update( {"url": job["url"]}, {"$set": { "job_summary": job[key] }}, upsert=False, multi=False) db[company_name].update({"url": job["url"]}, {"$set": { "translated": False }}, upsert=False, multi=False)
def tag_jobs(self, company_name): mongo = connection.getMongoConnection() db = mongo[config.Mongo_DB_NAME] while db[company_name].find({"BU": ""}).count() > 0: job = db[company_name].find_one({"BU": ""}) if self.to_ignore(job['jobtitle']): db[company_name].update({"url": job['url']}, {"$set": { "BU": "invalid" }}, upsert=False, multi=False) else: bu = self.calculate_tag(company_name, job["job_summary"]) db[company_name].update({"url": job['url']}, {"$set": { "BU": bu }}, upsert=False, multi=False)
def __init__(self): self.conn = connection.getMongoConnection() self.db = self.conn[config.Mongo_DB_NAME]
def __init__(self, company_name, key): self.query = "company:({0})".format(company_name) self.endpoint = 'http://api.indeed.com/ads/apisearch' self.key = key self.conn = connection.getMongoConnection() self.db = self.conn[Mongo_DB_NAME]