Beispiel #1
0
		x = str(re.sub('[^0-9a-zA-Z ,-:]+', '', li.text))
		print x
		specialties.append((therapist_id, x))
	if len(specialties) > 0:
		if exists:
			db.replace(c, 'therapists', info, ['therapist_id', 'pt_id', 'name', 'summary', 'phone'])
		else:
			db.insert(c, 'th_specialties', specialties, ['therapist_id', 'specialty'], multi=True)

	#extract issues focus
	print '\nISSUES'
	print '-------------------------'
	issues = []
	text = re.search(r'Issues</h3>.*?<h', driver.page_source.replace('\n', ''))
	if text:
		text = BeautifulSoup(text.group(0))
		for li in text.findAll('li'):
			print li.text
			try:
				for item in str(li.text).split(', '):
					issues.append((therapist_id, item))
			except UnicodeEncodeError:
					pass
		if len(issues) > 0:
			if exists:
				db.replace(c, 'therapists', info, ['therapist_id', 'pt_id', 'name', 'summary', 'phone'])
			else:
				db.insert(c, 'th_issues', issues, ['therapist_id', 'issue'], multi=True)

	#extract mental health focus
	print '\nMENTAL HEALTH'