#Get IIT Madras Faculty Page
logging.basicConfig(filename='bombay_cs_faculty_list.log', level=logging.DEBUG)
logging.debug("Reading Bombay faculty list page")
try:
    request = urllib2.urlopen("http://www.cse.iitb.ac.in/page14")
    page = request.read().encode('utf8')
except Exception, e:
    print "Failed to read faculty webpage : %s" % e

logging.debug("Extract individual faculty details")
soup = BeautifulSoup(page)

#Get institute id
conn = Connection()
instituteID = conn.getInstituteID("Bombay")

tables = soup.find("table").findAll("table")[1:]

for table in tables:
    faculty_page_link = ""
    name = ""
    dept = ""
    designation = ""
    email = ""
    photo_href = ""
    research_field = []
    publications = []
    
    try:
#User Library
from database.mysql.connection import Connection
from common.faculty import Faculty

#Get IIT Delhi Electrical Faculty Page
logging.basicConfig(filename='delhi_Electrical_faculty_list.log', level=logging.DEBUG)
logging.debug("Reading Delhi Electrical faculty list page")
try:
    request = urllib2.urlopen("http://ee.iitd.ernet.in/people/complete_fac.html")
    page = request.read()
except Exception, e:
    print "Failed to read faculty webpage : %s" % e


#Get institute id
conn = Connection()
instituteID = conn.getInstituteID("Delhi")

logging.debug("Extract individual faculty details")
soup = BeautifulSoup(page)

tables = soup.find("div", attrs={"id": "main"}).findAll("table")

for table in tables:
    faculty_page_link = ""
    name = ""
    dept = ""
    designation = ""
    email = ""
    photo_href = ""
    research_field = []
Beispiel #3
0
#User Library
from database.mysql.connection import Connection
from common.faculty import Faculty

#Get IIT Guwahati civil Faculty Page
logging.basicConfig(filename='guwahati_civil_faculty_list.log', level=logging.DEBUG)
logging.debug("Reading Guwahati civil faculty list page")
try:
    request = urllib2.urlopen("http://www.iitg.ac.in/civil/fac.htm")
    page = request.read()
except Exception, e:
    print "Failed to read faculty webpage : %s" % e


#Get institute id
conn = Connection()
instituteID = conn.getInstituteID("Guwahati")

logging.debug("Extract individual faculty details")
soup = BeautifulSoup(page)

rows = soup.find("table", id='fac_names').findAll("tr")

for row in rows[1:]:
    faculty_page_link = ""
    name = ""
    dept = ""
    designation = ""
    email = ""
    photo_href = ""
    research_field = []
from common.faculty import Faculty

#Get IIT Madras Faculty Page
logging.basicConfig(filename='madras_faculty_list.log', level=logging.DEBUG)
logging.debug("Reading madras faculty list page")
try:    
    request = urllib2.urlopen("http://www.iitm.ac.in/fsportal/iitmsite/listfaculty")
    page = request.read().encode('utf8')
except Exception, e:
    print "Failed to read faculty list webpage : %s" %e
    logging.debug("Failed to read faculty list webpage")

logging.debug("Finished reading faculty list page")

#Get institute id
conn = Connection()
instituteID = conn.getInstituteID("Madras")

#Get faculty pages
logging.debug("Get faculty list")
soup = BeautifulSoup(page)
rows = soup.find("table").find("tbody").findAll("tr")


'''
For debug
f = open('madras_faculties.csv', 'w')
'''

for row in rows:
    cells = row.findAll("td")