def search (self, row_h): """def: search""" for search_term in self.search_term_gen (row_h): self._do_search (search_term) if not re.search("gav ingen träff", cp1252(self.mech.response().read().decode('utf-8'))): break if re.search("gav ingen träff", cp1252(self.mech.response().read().decode('utf-8'))): result_h = {} result_h ['SEARCH_NAME'] = row_h ['PARENT_WORKPLACE_NAME'] result_h ['URL'] = "NOT FOUND" result_h ['DATE'] = datetime.datetime.today().strftime('%Y%m%d') csv_hitta_eniro.write_row_h(result_h) for result_h in self._scrape_result(): result_h ['SEARCH_NAME'] = row_h ['PARENT_WORKPLACE_NAME'] result_h ['MATCHING_CRITERIA'] = search_term result_h ['DATE'] = datetime.datetime.today().strftime('%Y%m%d') csv_hitta_eniro.write_row_h(result_h)
def search (self, vrow_h): """def: search""" search_term = vrow_h['PARENT_WORKPLACE_NAME'] + ',' + vrow_h['PARENT_WORKPLACE_ADDRESS'] self._do_search (search_term) #------------------------------------------------------------------------------- # Special Case if vrow_h['PARENT_WORKPLACE_NAME'] == 'Familjeläkarna i Sverige AB': vrow_h['PARENT_WORKPLACE_NAME'] = 'Familjeläkarna' #------------------------------------------------------------------------------- if re.search('inget resultat', self.mech.response().read()): logger.dump ("NO RESULT FOUND!!") search_term = vrow_h['PARENT_WORKPLACE_NAME'] self._do_search (search_term) if re.search('inget resultat', self.mech.response().read()): logger.dump ("NO RESULT FOUND!!") logger.dump ("Searching with address, zip city!!!!") search_term = "%(PARENT_WORKPLACE_ADDRESS)s, %(PARENT_WORKPLACE_ZIP)s %(PARENT_WORKPLACE_CITY)s" % vrow_h self._do_search (search_term) if re.search('inget resultat', self.mech.response().read()): logger.dump ("NO RESULT FOUND!!") logger.dump ("Searching with address, city!!!!") search_term = "%(PARENT_WORKPLACE_ADDRESS)s, %(PARENT_WORKPLACE_CITY)s" % vrow_h self._do_search (search_term) if re.search('inget resultat', self.mech.response().read()): logger.dump ("NO RESULT FOUND!! Skipping...") row_h = {} row_h['URL'] = 'NOT FOUND' row_h['SEARCH_NAME'] = vrow_h['PARENT_WORKPLACE_NAME'] csv_hitta_eniro.write_row_h(row_h) return link_found = 0 for link in self._company_links(): link_found = 1 while True: success = 0 try: logger.dump ("Result Get: " + link) self.mech.open(link) success = 1 except Exception as error: logger.dump (error) if success == 1: break row_h = self._scrape_result() row_h['URL'] = link row_h['SEARCH_NAME'] = vrow_h['PARENT_WORKPLACE_NAME'] row_h['MATCHING_CRITERIA'] = search_term row_h['DATE'] = datetime.datetime.today().strftime('%Y%m%d') csv_hitta_eniro.write_row_h(row_h) if link_found == 0: row_h = self._scrape_result() row_h['URL'] = self.url row_h['SEARCH_NAME'] = vrow_h['PARENT_WORKPLACE_NAME'] row_h['MATCHING_CRITERIA'] = search_term row_h['DATE'] = datetime.datetime.today().strftime('%Y%m%d') csv_hitta_eniro.write_row_h(row_h)