def parse(self,response): db = DbOperations() url = response.url url = url.replace("http://jobsearch.naukri.com/","").replace("-jobs","") urlCompany = re.sub("-[0-9]","",url) pp = pprint.PrettyPrinter(indent=4) global companyCombo for comp_id in sorted(companyCombo.iterkeys()): if urlCompany in companyCombo[comp_id]: fk_comp_id = comp_id sel = Selector(response) try: jobs = sel.xpath('//div[contains(@class,"row")]').extract() jobAttr = {} for i in range(1,51): elementParser = BeautifulSoup(jobs[i]) try: jobAttr['companyName'] = db.cleanSpacesAndCharacters(elementParser.find("span",itemprop="hiringOrganization").getText()) except: continue if(jobAttr['companyName'].lower().find(urlCompany.lower()) == 0): jobAttr['title'] = db.cleanSpacesAndCharacters(elementParser.find("span",itemprop="title").getText()) jobAttr['jobLocation'] = db.cleanSpacesAndCharacters(elementParser.find("span",itemprop="jobLocation").getText()) jobAttr['experience'] = db.cleanSpacesAndCharacters(elementParser.find("span",itemprop="experienceRequirements").getText()) jobAttr['salary'] = db.cleanSpacesAndCharacters(elementParser.find("span",itemprop="baseSalary").getText()) sql = """SELECT * FROM naukri_jobs_3 WHERE fk_company_id = '"""+str(fk_comp_id)+"""' AND jobtitle = '"""+jobAttr['title']+"""' AND location = '"""+jobAttr['jobLocation']+"""'""" result = db.executeQuery(sql) if result: return try: jobAttr['jobSnippet'] = db.cleanSpacesAndCharacters(elementParser.find("span",itemprop="description").getText()) except: try: jobAttr['jobSnippet'] = db.cleanSpacesAndCharacters(elementParser.find("div",class_="more").getText()) except: continue try: jobAttr['source'] = db.cleanSpacesAndCharacters(elementParser.find("div",class_ = "rec_details").getText()) except: jobAttr['source'] = jobAttr['companyName'] jobUrl = elementParser.find("a").get("href") jobAttr['jobUrl'] = jobUrl try: jobPage = urllib2.urlopen(jobUrl).read() jobDescriptionParser = BeautifulSoup(jobPage) try: jobAttr['jobDescription'] = ' '.join(jobDescriptionParser.find("ul",itemprop="description").getText().replace("\t","").replace("\n","").split()).replace("'","") except: try: jobAttr['jobDescription'] = ' '.join(jobDescriptionParser.find("div",class_="f14 lh18 alignJ disc-li").getText().replace("\t","").replace("\n","").split()).replace("'","") except: try: jobAttr['jobDescription'] = jobDescriptionParser.find("meta",{"property":"og:description"}) jobAttr['jobDescription'] = db.cleanSpacesAndCharacters(jobAttr['jobDescription']['content']) except: try: jobAttr['jobDescription'] = ' '.join(jobDescriptionParser.findAll("td",{"class":"detailJob"})[2].getText().replace("\t","").replace("\n","").split()).replace("'","") except: jobAttr['jobDescription'] = jobAttr['jobSnippet'] except: print "hello" sql = """INSERT INTO naukri_jobs_4 SET jobtitle = '"""+jobAttr['title']+"""', snippet = '"""+jobAttr['jobSnippet']+"""', location = '"""+jobAttr['jobLocation']+"""', naukri_company_name = '"""+jobAttr['companyName']+"""', fk_company_id = '"""+str(fk_comp_id)+"""', job_url = '"""+smart_str(jobAttr['jobUrl'])+"""', experience = '"""+smart_str(jobAttr['experience'])+"""', salary = '"""+smart_str(jobAttr['salary'])+"""', full_description = '"""+smart_str(MySQLdb.escape_string(jobAttr['jobDescription']))+"""', source = '"""+jobAttr['source']+"""'""" db.executeQuery(sql) else: # print "hello" continue except: return