#Get IIT Madras Faculty Page logging.basicConfig(filename='bombay_cs_faculty_list.log', level=logging.DEBUG) logging.debug("Reading Bombay faculty list page") try: request = urllib2.urlopen("http://www.cse.iitb.ac.in/page14") page = request.read().encode('utf8') except Exception, e: print "Failed to read faculty webpage : %s" % e logging.debug("Extract individual faculty details") soup = BeautifulSoup(page) #Get institute id conn = Connection() instituteID = conn.getInstituteID("Bombay") tables = soup.find("table").findAll("table")[1:] for table in tables: faculty_page_link = "" name = "" dept = "" designation = "" email = "" photo_href = "" research_field = [] publications = [] try:
#User Library from database.mysql.connection import Connection from common.faculty import Faculty #Get IIT Delhi Electrical Faculty Page logging.basicConfig(filename='delhi_Electrical_faculty_list.log', level=logging.DEBUG) logging.debug("Reading Delhi Electrical faculty list page") try: request = urllib2.urlopen("http://ee.iitd.ernet.in/people/complete_fac.html") page = request.read() except Exception, e: print "Failed to read faculty webpage : %s" % e #Get institute id conn = Connection() instituteID = conn.getInstituteID("Delhi") logging.debug("Extract individual faculty details") soup = BeautifulSoup(page) tables = soup.find("div", attrs={"id": "main"}).findAll("table") for table in tables: faculty_page_link = "" name = "" dept = "" designation = "" email = "" photo_href = "" research_field = []
#User Library from database.mysql.connection import Connection from common.faculty import Faculty #Get IIT Guwahati civil Faculty Page logging.basicConfig(filename='guwahati_civil_faculty_list.log', level=logging.DEBUG) logging.debug("Reading Guwahati civil faculty list page") try: request = urllib2.urlopen("http://www.iitg.ac.in/civil/fac.htm") page = request.read() except Exception, e: print "Failed to read faculty webpage : %s" % e #Get institute id conn = Connection() instituteID = conn.getInstituteID("Guwahati") logging.debug("Extract individual faculty details") soup = BeautifulSoup(page) rows = soup.find("table", id='fac_names').findAll("tr") for row in rows[1:]: faculty_page_link = "" name = "" dept = "" designation = "" email = "" photo_href = "" research_field = []
from common.faculty import Faculty #Get IIT Madras Faculty Page logging.basicConfig(filename='madras_faculty_list.log', level=logging.DEBUG) logging.debug("Reading madras faculty list page") try: request = urllib2.urlopen("http://www.iitm.ac.in/fsportal/iitmsite/listfaculty") page = request.read().encode('utf8') except Exception, e: print "Failed to read faculty list webpage : %s" %e logging.debug("Failed to read faculty list webpage") logging.debug("Finished reading faculty list page") #Get institute id conn = Connection() instituteID = conn.getInstituteID("Madras") #Get faculty pages logging.debug("Get faculty list") soup = BeautifulSoup(page) rows = soup.find("table").find("tbody").findAll("tr") ''' For debug f = open('madras_faculties.csv', 'w') ''' for row in rows: cells = row.findAll("td")