def write_ttl(self): ttl_file = ttl.TtlFile(self.ttl_filename) webpage = requests.get(self._link) try: webpage.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) soup = BeautifulSoup(webpage.text, "html.parser") elems = soup.findAll('tr', {"class": "FacultyTableRow"}) for i in range(0, len(elems)): data = elems[i] pic_a_href = data.select('img')[0]['src'] a_href = data.select('a')[0]['href'] pic_link = self._flink + pic_a_href prof_link = self._flink + a_href prof = self._parse_prof_site(prof_link) prof.picture = pic_link prof.website = prof_link prof.write_to(ttl_file) ttl_file.close() return ttl_file
def write_ttl(self): ttl_file = ttl.TtlFile(self.ttl_filename) webpage = requests.get(self._link) try: webpage.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) soup = BeautifulSoup(webpage.text, "html.parser") elems = soup.select('tr') for i in range(2, len(elems)): nameStr = elems[i].find('strong').getText() titleStr = elems[i].find('br').next_sibling contact_info = elems[i].select('p')[2].getText().split('\n') emailStr = contact_info[0] phoneStr = contact_info[1] roomStr = contact_info[2] interestsStr = elems[i].select('p')[3].getText() prof = ttl.TtlFileEntry() prof.name = nameStr prof.property = "faculty" prof.title = titleStr prof.email = emailStr prof.phone = phoneStr prof.room = roomStr prof.Interests = interestsStr prof.write_to(ttl_file) ttl_file.close() return ttl_file
def write_ttl(self): ttl_file = ttl.TtlFile(self.ttl_filename) webpage = requests.get(self._link) try: webpage.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) soup = BeautifulSoup(webpage.text, "html.parser") elems = soup.select('tr') for i in range(1, len(elems)): data = elems[i].select('div') img_src = data[0].select('img')[0]['src'] a_href = data[1].select('a')[0]['href'] prof_link = self._flink + a_href data = list(map(lambda x: x.getText(), data)) prof = self._parse_prof_site(prof_link) prof.picture = self._flink + img_src prof.name = data[1] prof.property = "faculty" prof.title = data[2] prof.department = data[3] prof.interests = data[4].split(':')[1] prof.email = data[5].split(":")[1] if not data[6].isspace(): prof.phone = data[6].split(":")[1] if not data[7].isspace(): prof.room = data[7].split(":")[1] prof.write_to(ttl_file) ttl_file.close() return ttl_file
def write_ttl(self): ttl_file = ttl.TtlFile(self.ttl_filename) webpage = requests.get(self._link) try: webpage.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) soup = BeautifulSoup(webpage.text, "html.parser") tables = soup.select('article') for i in range(1, len(tables)): elems = tables[i].select('tr') for j in range(1, len(elems)): data = elems[j].select('td') prof = ttl.TtlFileEntry() prof.name = data[0].getText() prof.property = "faculty" prof.title = data[1].select('br')[0].previous_sibling prof.department = data[1].select('br')[0].next_sibling prof.email = data[2].select('br')[0].previous_sibling.getText() prof.phone = data[2].select('br')[0].next_sibling prof.write_to(ttl_file) ttl_file.close() return ttl_file
def write_ttl(self): ttl_file = ttl.TtlFile(self.ttl_filename) webpage = requests.get(self._link) try: webpage.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) soup = BeautifulSoup(webpage.text, "html.parser") elems = soup.select('tr') for i in range(0, len(elems)): data = elems[i] img_src = data.select('img')[0]['src'] data = data.getText().strip().split('\n') prof = ttl.TtlFileEntry() prof.picture = self._flink + img_src prof.property = "faculty" prof.name = data[0] prof.title = data[1] if not data[2].isspace(): prof.phone = data[2] if not data[3].isspace(): prof.email = data[3] prof.write_to(ttl_file) ttl_file.close() return ttl_file
def write_ttl(self): ttl_file = ttl.TtlFile(self.ttl_filename) webpage = requests.get(self._link) try: webpage.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) soup = BeautifulSoup(webpage.text, "html.parser") elems = soup.select('tr') for i in range(2, len(elems) - 14): prof = ttl.TtlFileEntry() #print (elems[i]) nameStr = elems[i].find('strong').getText().strip() #print (nameStr) titleStr = elems[i].br.next_sibling.strip() #print (titleStr) contactStr = elems[i].select('td')[1].getText() contactList = contactStr.splitlines() #print (contactList) if contactList[1]: emailStr = contactList[1].strip() prof.email = emailStr #print (emailStr) if len(contactList) > 2 and contactList[2]: phoneStr = contactList[2].strip() prof.phone = phoneStr #print (phoneStr) if len(contactList) > 3 and contactList[3]: roomStr = contactList[3].strip() prof.room = roomStr #print (roomStr) interestsStr = elems[i].select('td')[2].getText().strip() #print (interestsStr) img_src = elems[i].select('img')[0]['src'].strip() pictureStr = self._flink + img_src #print (pictureStr) prof.name = nameStr prof.property = "faculty" prof.title = titleStr prof.interests = interestsStr prof.picture = pictureStr prof.write_to(ttl_file) ttl_file.close() return ttl_file
def write_ttl(self): with open(self.csv_filename, 'r') as f: ttl_file = ttl.TtlFile(self.ttl_filename) reader = csv.DictReader(f) for row in reader: building = ttl.TtlFileEntry() building.property = "building" names = row["Name"] names = names.split(';') names = names + self.filter_common_names(names) building.name = names[0] building.altnames = names[1:] if (row["Code"]): building.altnames.append(row["Code"]) building.altnames.append(row["Code"].title()) building.department = row["Function"] building.address = row["Address"] building.picture = row["Picture"] building.website = row["Website"] building.mStartTime = row["MStartTime"] building.tStartTime = row["TStartTime"] building.wStartTime = row["WStartTime"] building.thStartTime = row["ThStartTime"] building.fStartTime = row["FStartTime"] building.saStartTime = row["SaStartTime"] building.suStartTime = row["SuStartTime"] building.mEndTime = row["MEndTime"] building.tEndTime = row["TEndTime"] building.wEndTime = row["WEndTime"] building.thEndTime = row["ThEndTime"] building.fEndTime = row["FEndTime"] building.saEndTime = row["SaEndTime"] building.suEndTime = row["SuEndTime"] building.write_to(ttl_file) ttl_file.close() return ttl_file
def write_ttl(self): ttl_file = ttl.TtlFile(self.ttl_filename) webpage = requests.get(self._link) try: webpage.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) soup = BeautifulSoup(webpage.text, "html.parser") elems = soup.select('tr') for i in range(2, len(elems)-6): #print (elems[i]) nameStr = elems[i].find('strong').getText() #print (nameStr) titleStr = elems[i].br.next_sibling.strip() #print (titleStr) emailStr = elems[i].select('p')[2].getText() phoneStr = elems[i].select('p')[3].getText() roomStr = elems[i].select('p')[4].getText() #print (emailStr) #print (phoneStr) #print (roomStr) interestsStr = elems[i].select('p')[5].getText().strip() #print (interestsStr) prof = ttl.TtlFileEntry() prof.name = nameStr prof.property = "faculty" prof.title = titleStr prof.email = emailStr prof.phone = phoneStr prof.room = roomStr prof.Interests = interestsStr prof.write_to(ttl_file) ttl_file.close() return ttl_file
def write_ttl(self): ttl_file = ttl.TtlFile(self.ttl_filename) webpage = requests.get(self._link) try: webpage.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) soup = BeautifulSoup(webpage.text, "html.parser") elems = soup.findAll('tr', {"class": "FacultyTableRow"}) for i in range(0, len(elems)): data = elems[i] div_facultyHeadshot = data.find('div', {"class": "facultyHeadshot"}) image = div_facultyHeadshot.img div_fac_info = data.find('div', {"class": "fac-info"}) h2_fname = div_fac_info.find('h2', {"class": "fname"}) h2_fname_list = h2_fname.getText().split(",", 1) div_fcontact = div_fac_info.find('div', {"class": "fcontact"}) location_text = div_fcontact.contents[3].getText() location_list = location_text.split("\n") prof = ttl.TtlFileEntry() if image is not None: prof.picture = "http://drexel.edu" + image['src'] prof.name = h2_fname_list[0] prof.property = "faculty" if len(h2_fname_list) > 1: prof.degree = h2_fname_list[1] prof.title = div_fcontact.contents[1].getText() prof.office = location_list[0] prof.email = location_list[1] prof.phone = location_list[2] prof.department = data.find_all('td')[1].next_element prof.write_to(ttl_file) ttl_file.close() return ttl_file
def write_ttl(self): ttl_file = ttl.TtlFile(self.ttl_filename) webpage = requests.get(self._link) try: webpage.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) soup = BeautifulSoup(webpage.text, "html.parser") table = soup.select('tbody')[0] elems = table.select('tr') for i in range(2, len(elems)): rows = elems[i].select("td") picture = rows[0].find('img')['src'] picture = self._flink + picture nameStr = rows[0].find('h1').getText() titleStr = rows[0].find('h2').getText() emailStr = rows[1].find('a').getText() phoneStr = rows[1].find('br').next_sibling phoneStr = phoneStr.split(":")[1] phoneStr = phoneStr.split('\n')[0] interestsStr = rows[2].getText() prof = ttl.TtlFileEntry() prof.name = nameStr prof.property = "faculty" prof.picture = picture prof.title = titleStr prof.email = emailStr prof.phone = phoneStr #prof.room = roomStr prof.Interests = interestsStr prof.write_to(ttl_file) ttl_file.close() return ttl_file
def write_ttl(self): ttl_file = ttl.TtlFile(self.ttl_filename) webpage = requests.get(self._link) try: webpage.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) soup = BeautifulSoup(webpage.text, "html.parser") elems = soup.find('tbody').select('tr') for i in range(0, len(elems)): data = elems[i].select('td') img = data[0].find('img') picture = "" if img: img_src = data[0].find('img')['src'] picture = self._flink + img_src info = data[1].getText().split("\n") name = info[1].split(',')[0] title = info[2] phone = info[3] email = info[4] department = data[2].getText() prof = ttl.TtlFileEntry() prof.name = name prof.property = "faculty" prof.picture = picture prof.title = title prof.phone = phone prof.email = email prof.department = department prof.write_to(ttl_file) ttl_file.close() return ttl_file
def write_ttl(self): ttl_file = ttl.TtlFile(self.ttl_filename) webpage1 = requests.get(self._link1) try: webpage1.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) soup1 = BeautifulSoup(webpage1.text, "html.parser") webpage2 = requests.get(self._link2) try: webpage2.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) soup2 = BeautifulSoup(webpage2.text, "html.parser") self._refreshFromSoup(soup1, ttl_file) self._refreshFromSoup(soup2, ttl_file) ttl_file.close() return ttl_file
def write_ttl(self): ttl_file = ttl.TtlFile(self.ttl_filename) webpage = requests.get(self._link) try: webpage.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) soup = BeautifulSoup(webpage.text, "html.parser") elems = soup.select('tr') for i in range(3, len(elems)): e = elems[i] nameStr = elems[i].find('h1').getText() titleStr = elems[i].find('p').getText() a_href = elems[i].select('a')[1]['href'] websiteStr = self._flink + a_href emailStr = elems[i].select('a')[2].getText() if elems[i].select('img'): img_src = elems[i].select('img')[0]['src'] pictureStr = self._flink + img_src phoneStr = elems[i].select('td')[2].find('br').next_sibling if not phoneStr.isspace(): phoneStr = phoneStr.split(":")[1] prof = ttl.TtlFileEntry() prof.name = nameStr prof.property = 'faculty' prof.website = websiteStr prof.title = titleStr prof.email = emailStr prof.phone = phoneStr prof.website = websiteStr prof.write_to(ttl_file) ttl_file.close() return ttl_file
def write_ttl(self): ttl_file = ttl.TtlFile(self.ttl_filename) webpage = requests.get(self._link) try: webpage.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) soup = BeautifulSoup(webpage.text, "html.parser") elems = soup.findAll('div', {"class" : "user-profile-stub clearfix"}) for i in range(0, len(elems)): e = elems[i] all_text = e.findAll(text=True) data = list(filter(lambda a: a != "\n", all_text)) nameEdu = data[0].split(',', 1) prof = ttl.TtlFileEntry() prof.name = nameEdu[0] prof.property = "faculty" if len(nameEdu) > 1: prof.education = nameEdu[1] prof.title = data[1] prof.department = data[2] prof.room = data[4] prof.phone = data[6] prof.email = data[8] if "Areas of Expertise" in data: aoe = data.index("Areas of Expertise") prof.interests = ", ".join(data[aoe+1:]) prof.write_to(ttl_file) ttl_file.close() return ttl_file
def write_ttl(self): ttl_file = ttl.TtlFile(self.ttl_filename) webpage = requests.get(self._link) try: webpage.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) soup = BeautifulSoup(webpage.text, "html.parser") elems = soup.findAll('div', {"class": "faculty-container"}) for i in range(0, len(elems)): data = elems[i] a = data.find('a') if a is None: continue a_href = a['href'] prof_link = self._flink + a_href prof = self._parse_prof_site(prof_link) prof.write_to(ttl_file) ttl_file.close() return ttl_file
def write_ttl(self): ttl_file = ttl.TtlFile(self.ttl_filename) webpage = requests.get(self._link) try: webpage.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) soup = BeautifulSoup(webpage.text, "html.parser") elems = soup.findAll('a') for i in range(0, len(elems)): e = elems[i] if "/directory/" in e[ "href"] and e["href"] != "/westphal/about/directory/": #print (e["href"]) _plink = self._flink + e["href"] fpage = requests.get(_plink) try: fpage.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) fsoup = BeautifulSoup(fpage.text, "html.parser") #writeHTMLFile(fsoup, "test.html") prof = ttl.TtlFileEntry() nameStr = fsoup.find('div', { "class": "faculty-name" }).getText() #print (nameStr) titleStr = fsoup.find('div', {"class": "title"}).getText() #print (titleStr) contactStr = fsoup.find('div', {"class": "contact"}).getText() contactList = contactStr.splitlines() if "PH:" in contactList[3]: phoneStr = contactList[3].split(": ")[1].replace(".", "") #print(phoneStr) if "Email:" in contactList[4]: emailStr = contactList[4].split(": ")[1] #print(emailStr) if "Website:" in contactList[5]: websiteStr = contactList[5] if websiteStr.split(": ")[1]: websiteStr = websiteStr.split(": ")[1] else: websiteStr = contactList[6].strip() #print (websiteStr) locationStr = fsoup.find('div', { "class": "location" }).getText() locationList = locationStr.splitlines() if len(locationList) > 2 and locationList[2]: officeStr = locationList[2] #print (officeStr) #infoStr = fsoup.find('div', {"id" : "tabs"}).getText() #print (infoStr) prof.name = nameStr.split(',', 1)[0] prof.property = "faculty" if titleStr: prof.title = titleStr prof.phone = phoneStr prof.email = emailStr prof.website = websiteStr prof.room = officeStr prof.write_to(ttl_file) ttl_file.close() return ttl_file