Example #1
0
    def write_ttl(self):
        ttl_file = ttl.TtlFile(self.ttl_filename)

        webpage = requests.get(self._link)
        try:
            webpage.raise_for_status()
        except Exception as exc:
            print('There was a problem: %s' % (exc))
        soup = BeautifulSoup(webpage.text, "html.parser")

        elems = soup.findAll('tr', {"class": "FacultyTableRow"})
        for i in range(0, len(elems)):
            data = elems[i]
            pic_a_href = data.select('img')[0]['src']
            a_href = data.select('a')[0]['href']
            pic_link = self._flink + pic_a_href
            prof_link = self._flink + a_href

            prof = self._parse_prof_site(prof_link)
            prof.picture = pic_link
            prof.website = prof_link
            prof.write_to(ttl_file)

        ttl_file.close()
        return ttl_file
Example #2
0
    def write_ttl(self):
        ttl_file = ttl.TtlFile(self.ttl_filename)

        webpage = requests.get(self._link)
        try:
            webpage.raise_for_status()
        except Exception as exc:
            print('There was a problem: %s' % (exc))
        soup = BeautifulSoup(webpage.text, "html.parser")

        elems = soup.select('tr')
        for i in range(2, len(elems)):
            nameStr = elems[i].find('strong').getText()
            titleStr = elems[i].find('br').next_sibling
            contact_info = elems[i].select('p')[2].getText().split('\n')
            emailStr = contact_info[0]
            phoneStr = contact_info[1]
            roomStr = contact_info[2]
            interestsStr = elems[i].select('p')[3].getText()

            prof = ttl.TtlFileEntry()

            prof.name = nameStr
            prof.property = "faculty"
            prof.title = titleStr
            prof.email = emailStr
            prof.phone = phoneStr
            prof.room = roomStr
            prof.Interests = interestsStr

            prof.write_to(ttl_file)

        ttl_file.close()

        return ttl_file
Example #3
0
    def write_ttl(self):
        ttl_file = ttl.TtlFile(self.ttl_filename)

        webpage = requests.get(self._link)
        try:
            webpage.raise_for_status()
        except Exception as exc:
            print('There was a problem: %s' % (exc))
        soup = BeautifulSoup(webpage.text, "html.parser")

        elems = soup.select('tr')
        for i in range(1, len(elems)):
            data = elems[i].select('div')
            img_src = data[0].select('img')[0]['src']
            a_href = data[1].select('a')[0]['href']
            prof_link = self._flink + a_href
            data = list(map(lambda x: x.getText(), data))

            prof = self._parse_prof_site(prof_link)
            prof.picture = self._flink + img_src
            prof.name = data[1]
            prof.property = "faculty"
            prof.title = data[2]
            prof.department = data[3]
            prof.interests = data[4].split(':')[1]
            prof.email = data[5].split(":")[1]
            if not data[6].isspace():
                prof.phone = data[6].split(":")[1]
            if not data[7].isspace():
                prof.room = data[7].split(":")[1]

            prof.write_to(ttl_file)

        ttl_file.close()
        return ttl_file
Example #4
0
    def write_ttl(self):
        ttl_file = ttl.TtlFile(self.ttl_filename)

        webpage = requests.get(self._link)
        try:
            webpage.raise_for_status()
        except Exception as exc:
            print('There was a problem: %s' % (exc))
        soup = BeautifulSoup(webpage.text, "html.parser")

        tables = soup.select('article')
        for i in range(1, len(tables)):
            elems = tables[i].select('tr')
            for j in range(1, len(elems)):
                data = elems[j].select('td')

                prof = ttl.TtlFileEntry()
                prof.name = data[0].getText()
                prof.property = "faculty"
                prof.title = data[1].select('br')[0].previous_sibling
                prof.department = data[1].select('br')[0].next_sibling
                prof.email = data[2].select('br')[0].previous_sibling.getText()
                prof.phone = data[2].select('br')[0].next_sibling
                prof.write_to(ttl_file)

        ttl_file.close()
        return ttl_file
Example #5
0
    def write_ttl(self):
        ttl_file = ttl.TtlFile(self.ttl_filename)

        webpage = requests.get(self._link)
        try:
            webpage.raise_for_status()
        except Exception as exc:
            print('There was a problem: %s' % (exc))
        soup = BeautifulSoup(webpage.text, "html.parser")

        elems = soup.select('tr')
        for i in range(0, len(elems)):
            data = elems[i]
            img_src = data.select('img')[0]['src']
            data = data.getText().strip().split('\n')
            prof = ttl.TtlFileEntry()
            prof.picture = self._flink + img_src
            prof.property = "faculty"
            prof.name = data[0]
            prof.title = data[1]
            if not data[2].isspace():
                prof.phone = data[2]
            if not data[3].isspace():
                prof.email = data[3]

            prof.write_to(ttl_file)

        ttl_file.close()
        return ttl_file
Example #6
0
    def write_ttl(self):
        ttl_file = ttl.TtlFile(self.ttl_filename)

        webpage = requests.get(self._link)
        try:
            webpage.raise_for_status()
        except Exception as exc:
            print('There was a problem: %s' % (exc))
        soup = BeautifulSoup(webpage.text, "html.parser")

        elems = soup.select('tr')
        for i in range(2, len(elems) - 14):

            prof = ttl.TtlFileEntry()

            #print (elems[i])
            nameStr = elems[i].find('strong').getText().strip()
            #print (nameStr)
            titleStr = elems[i].br.next_sibling.strip()
            #print (titleStr)
            contactStr = elems[i].select('td')[1].getText()
            contactList = contactStr.splitlines()
            #print (contactList)
            if contactList[1]:
                emailStr = contactList[1].strip()
                prof.email = emailStr
                #print (emailStr)
            if len(contactList) > 2 and contactList[2]:
                phoneStr = contactList[2].strip()
                prof.phone = phoneStr
                #print (phoneStr)
            if len(contactList) > 3 and contactList[3]:
                roomStr = contactList[3].strip()
                prof.room = roomStr
                #print (roomStr)
            interestsStr = elems[i].select('td')[2].getText().strip()
            #print (interestsStr)
            img_src = elems[i].select('img')[0]['src'].strip()
            pictureStr = self._flink + img_src
            #print (pictureStr)

            prof.name = nameStr
            prof.property = "faculty"
            prof.title = titleStr
            prof.interests = interestsStr
            prof.picture = pictureStr

            prof.write_to(ttl_file)

        ttl_file.close()

        return ttl_file
    def write_ttl(self):
        with open(self.csv_filename, 'r') as f:
            ttl_file = ttl.TtlFile(self.ttl_filename)
            reader = csv.DictReader(f)
            for row in reader:
                building = ttl.TtlFileEntry()
                building.property = "building"

                names = row["Name"]
                names = names.split(';')
                names = names + self.filter_common_names(names)

                building.name = names[0]
                building.altnames = names[1:]
                if (row["Code"]):
                    building.altnames.append(row["Code"])
                    building.altnames.append(row["Code"].title())

                building.department = row["Function"]
                building.address = row["Address"]
                building.picture = row["Picture"]
                building.website = row["Website"]

                building.mStartTime = row["MStartTime"]
                building.tStartTime = row["TStartTime"]
                building.wStartTime = row["WStartTime"]
                building.thStartTime = row["ThStartTime"]
                building.fStartTime = row["FStartTime"]
                building.saStartTime = row["SaStartTime"]
                building.suStartTime = row["SuStartTime"]
                building.mEndTime = row["MEndTime"]
                building.tEndTime = row["TEndTime"]
                building.wEndTime = row["WEndTime"]
                building.thEndTime = row["ThEndTime"]
                building.fEndTime = row["FEndTime"]
                building.saEndTime = row["SaEndTime"]
                building.suEndTime = row["SuEndTime"]

                building.write_to(ttl_file)

            ttl_file.close()
            return ttl_file
Example #8
0
    def write_ttl(self):
        ttl_file = ttl.TtlFile(self.ttl_filename)

        webpage = requests.get(self._link)
        try:
            webpage.raise_for_status()
        except Exception as exc:
            print('There was a problem: %s' % (exc))
        soup = BeautifulSoup(webpage.text, "html.parser")

        elems = soup.select('tr')
        for i in range(2, len(elems)-6):
            #print (elems[i])
            nameStr = elems[i].find('strong').getText()
            #print (nameStr)
            titleStr = elems[i].br.next_sibling.strip()
            #print (titleStr)
            emailStr = elems[i].select('p')[2].getText()
            phoneStr = elems[i].select('p')[3].getText()
            roomStr = elems[i].select('p')[4].getText()
            #print (emailStr)
            #print (phoneStr)
            #print (roomStr)
            interestsStr = elems[i].select('p')[5].getText().strip()
            #print (interestsStr)

            prof = ttl.TtlFileEntry()

            prof.name = nameStr
            prof.property = "faculty"
            prof.title = titleStr
            prof.email = emailStr
            prof.phone = phoneStr
            prof.room = roomStr
            prof.Interests = interestsStr

            prof.write_to(ttl_file)
    
        ttl_file.close()

        return ttl_file
Example #9
0
    def write_ttl(self):
        ttl_file = ttl.TtlFile(self.ttl_filename)

        webpage = requests.get(self._link)
        try:
            webpage.raise_for_status()
        except Exception as exc:
            print('There was a problem: %s' % (exc))
        soup = BeautifulSoup(webpage.text, "html.parser")

        elems = soup.findAll('tr', {"class": "FacultyTableRow"})
        for i in range(0, len(elems)):
            data = elems[i]
            div_facultyHeadshot = data.find('div',
                                            {"class": "facultyHeadshot"})
            image = div_facultyHeadshot.img
            div_fac_info = data.find('div', {"class": "fac-info"})
            h2_fname = div_fac_info.find('h2', {"class": "fname"})
            h2_fname_list = h2_fname.getText().split(",", 1)
            div_fcontact = div_fac_info.find('div', {"class": "fcontact"})
            location_text = div_fcontact.contents[3].getText()
            location_list = location_text.split("\n")

            prof = ttl.TtlFileEntry()

            if image is not None:
                prof.picture = "http://drexel.edu" + image['src']
            prof.name = h2_fname_list[0]
            prof.property = "faculty"
            if len(h2_fname_list) > 1:
                prof.degree = h2_fname_list[1]
            prof.title = div_fcontact.contents[1].getText()
            prof.office = location_list[0]
            prof.email = location_list[1]
            prof.phone = location_list[2]
            prof.department = data.find_all('td')[1].next_element

            prof.write_to(ttl_file)

        ttl_file.close()
        return ttl_file
    def write_ttl(self):
        ttl_file = ttl.TtlFile(self.ttl_filename)

        webpage = requests.get(self._link)
        try:
            webpage.raise_for_status()
        except Exception as exc:
            print('There was a problem: %s' % (exc))
        soup = BeautifulSoup(webpage.text, "html.parser")

        table = soup.select('tbody')[0]
        elems = table.select('tr')
        for i in range(2, len(elems)):
            rows = elems[i].select("td")
            picture = rows[0].find('img')['src']
            picture = self._flink + picture
            nameStr = rows[0].find('h1').getText()
            titleStr = rows[0].find('h2').getText()
            emailStr = rows[1].find('a').getText()
            phoneStr = rows[1].find('br').next_sibling
            phoneStr = phoneStr.split(":")[1]
            phoneStr = phoneStr.split('\n')[0]
            interestsStr = rows[2].getText()

            prof = ttl.TtlFileEntry()

            prof.name = nameStr
            prof.property = "faculty"
            prof.picture = picture 
            prof.title = titleStr
            prof.email = emailStr
            prof.phone = phoneStr
            #prof.room = roomStr
            prof.Interests = interestsStr

            prof.write_to(ttl_file)
    
        ttl_file.close()

        return ttl_file
    def write_ttl(self):
        ttl_file = ttl.TtlFile(self.ttl_filename)

        webpage = requests.get(self._link)
        try:
            webpage.raise_for_status()
        except Exception as exc:
            print('There was a problem: %s' % (exc))
        soup = BeautifulSoup(webpage.text, "html.parser")

        elems = soup.find('tbody').select('tr')
        for i in range(0, len(elems)):
            data = elems[i].select('td')

            img = data[0].find('img')
            picture = ""
            if img:
                img_src = data[0].find('img')['src']
                picture = self._flink + img_src

            info = data[1].getText().split("\n")
            name = info[1].split(',')[0]
            title = info[2]
            phone = info[3]
            email = info[4]

            department = data[2].getText()
            
            prof = ttl.TtlFileEntry()
            prof.name = name
            prof.property = "faculty"
            prof.picture = picture
            prof.title = title
            prof.phone = phone
            prof.email = email
            prof.department = department
            prof.write_to(ttl_file)

        ttl_file.close()
        return ttl_file
Example #12
0
    def write_ttl(self):
        ttl_file = ttl.TtlFile(self.ttl_filename)

        webpage1 = requests.get(self._link1)
        try:
            webpage1.raise_for_status()
        except Exception as exc:
            print('There was a problem: %s' % (exc))
        soup1 = BeautifulSoup(webpage1.text, "html.parser")

        webpage2 = requests.get(self._link2)
        try:
            webpage2.raise_for_status()
        except Exception as exc:
            print('There was a problem: %s' % (exc))
        soup2 = BeautifulSoup(webpage2.text, "html.parser")

        self._refreshFromSoup(soup1, ttl_file)
        self._refreshFromSoup(soup2, ttl_file)

        ttl_file.close()
        return ttl_file
Example #13
0
    def write_ttl(self):
        ttl_file = ttl.TtlFile(self.ttl_filename)

        webpage = requests.get(self._link)
        try:
            webpage.raise_for_status()
        except Exception as exc:
            print('There was a problem: %s' % (exc))
        soup = BeautifulSoup(webpage.text, "html.parser")
        
        elems = soup.select('tr')
        for i in range(3, len(elems)):
            e = elems[i]
            nameStr = elems[i].find('h1').getText()
            titleStr = elems[i].find('p').getText()
            a_href = elems[i].select('a')[1]['href']
            websiteStr = self._flink + a_href
            emailStr = elems[i].select('a')[2].getText()
            if elems[i].select('img'):
                img_src = elems[i].select('img')[0]['src']
                pictureStr = self._flink + img_src
            phoneStr = elems[i].select('td')[2].find('br').next_sibling
            if not phoneStr.isspace():
                phoneStr = phoneStr.split(":")[1]
            prof = ttl.TtlFileEntry()
            prof.name = nameStr
            prof.property = 'faculty'
            prof.website = websiteStr
            prof.title = titleStr
            prof.email = emailStr
            prof.phone = phoneStr
            prof.website = websiteStr
                
            prof.write_to(ttl_file)
        
        ttl_file.close()
        return ttl_file
Example #14
0
    def write_ttl(self):
        ttl_file = ttl.TtlFile(self.ttl_filename)

        webpage = requests.get(self._link)
        try:
            webpage.raise_for_status()
        except Exception as exc:
            print('There was a problem: %s' % (exc))
        soup = BeautifulSoup(webpage.text, "html.parser")

        elems = soup.findAll('div', {"class" : "user-profile-stub clearfix"})
        for i in range(0, len(elems)):
            e = elems[i]
            all_text = e.findAll(text=True)
            data = list(filter(lambda a: a != "\n", all_text))
            nameEdu = data[0].split(',', 1)

            prof = ttl.TtlFileEntry()

            prof.name = nameEdu[0]
            prof.property = "faculty"
            if len(nameEdu) > 1:
                prof.education = nameEdu[1]
            prof.title = data[1]
            prof.department = data[2]
            prof.room = data[4]
            prof.phone = data[6]
            prof.email = data[8]
            if "Areas of Expertise" in data:
                aoe = data.index("Areas of Expertise")
                prof.interests = ", ".join(data[aoe+1:])

            prof.write_to(ttl_file)
        
        ttl_file.close()
        return ttl_file
Example #15
0
    def write_ttl(self):
        ttl_file = ttl.TtlFile(self.ttl_filename)

        webpage = requests.get(self._link)
        try:
            webpage.raise_for_status()
        except Exception as exc:
            print('There was a problem: %s' % (exc))
        soup = BeautifulSoup(webpage.text, "html.parser")

        elems = soup.findAll('div', {"class": "faculty-container"})
        for i in range(0, len(elems)):
            data = elems[i]
            a = data.find('a')
            if a is None:
                continue
            a_href = a['href']
            prof_link = self._flink + a_href

            prof = self._parse_prof_site(prof_link)
            prof.write_to(ttl_file)

        ttl_file.close()
        return ttl_file
    def write_ttl(self):
        ttl_file = ttl.TtlFile(self.ttl_filename)

        webpage = requests.get(self._link)
        try:
            webpage.raise_for_status()
        except Exception as exc:
            print('There was a problem: %s' % (exc))
        soup = BeautifulSoup(webpage.text, "html.parser")

        elems = soup.findAll('a')
        for i in range(0, len(elems)):
            e = elems[i]
            if "/directory/" in e[
                    "href"] and e["href"] != "/westphal/about/directory/":
                #print (e["href"])
                _plink = self._flink + e["href"]
                fpage = requests.get(_plink)
                try:
                    fpage.raise_for_status()
                except Exception as exc:
                    print('There was a problem: %s' % (exc))
                fsoup = BeautifulSoup(fpage.text, "html.parser")

                #writeHTMLFile(fsoup, "test.html")

                prof = ttl.TtlFileEntry()

                nameStr = fsoup.find('div', {
                    "class": "faculty-name"
                }).getText()
                #print (nameStr)
                titleStr = fsoup.find('div', {"class": "title"}).getText()
                #print (titleStr)

                contactStr = fsoup.find('div', {"class": "contact"}).getText()
                contactList = contactStr.splitlines()
                if "PH:" in contactList[3]:
                    phoneStr = contactList[3].split(": ")[1].replace(".", "")
                #print(phoneStr)
                if "Email:" in contactList[4]:
                    emailStr = contactList[4].split(": ")[1]
                #print(emailStr)
                if "Website:" in contactList[5]:
                    websiteStr = contactList[5]
                    if websiteStr.split(": ")[1]:
                        websiteStr = websiteStr.split(": ")[1]
                    else:
                        websiteStr = contactList[6].strip()
                #print (websiteStr)

                locationStr = fsoup.find('div', {
                    "class": "location"
                }).getText()
                locationList = locationStr.splitlines()
                if len(locationList) > 2 and locationList[2]:
                    officeStr = locationList[2]
                #print (officeStr)

                #infoStr = fsoup.find('div', {"id" : "tabs"}).getText()
                #print (infoStr)

                prof.name = nameStr.split(',', 1)[0]
                prof.property = "faculty"
                if titleStr:
                    prof.title = titleStr
                prof.phone = phoneStr
                prof.email = emailStr
                prof.website = websiteStr
                prof.room = officeStr

                prof.write_to(ttl_file)

        ttl_file.close()
        return ttl_file