x = str(re.sub('[^0-9a-zA-Z ,-:]+', '', li.text)) print x specialties.append((therapist_id, x)) if len(specialties) > 0: if exists: db.replace(c, 'therapists', info, ['therapist_id', 'pt_id', 'name', 'summary', 'phone']) else: db.insert(c, 'th_specialties', specialties, ['therapist_id', 'specialty'], multi=True) #extract issues focus print '\nISSUES' print '-------------------------' issues = [] text = re.search(r'Issues</h3>.*?<h', driver.page_source.replace('\n', '')) if text: text = BeautifulSoup(text.group(0)) for li in text.findAll('li'): print li.text try: for item in str(li.text).split(', '): issues.append((therapist_id, item)) except UnicodeEncodeError: pass if len(issues) > 0: if exists: db.replace(c, 'therapists', info, ['therapist_id', 'pt_id', 'name', 'summary', 'phone']) else: db.insert(c, 'th_issues', issues, ['therapist_id', 'issue'], multi=True) #extract mental health focus print '\nMENTAL HEALTH'