# socks.set_default_proxy(socks.SOCKS5, "127.0.0.1", 9050)
# socket.socket = socks.socksocket
# session = requesocks.session()
# session.proxies = {'http': 'socks5://localhost:9050', 'https': 'socks5://localhost:9050'}


folder = os.path.basename(__file__).split(".")[0]

base_url = "http://www.emich.edu/directory/?page={}&first-name=First+Name&last-name={}&student=on&x=0&y=0"

def get_name(name):
	filename = "{}/{}.csv".format(folder,name)
	if not(os.path.exists(filename)):
		print name
		url = base_url.format(1,name,headers={"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36"})
		num_pages = int(ceil(int(pq(url)(".pagination").eq(1).text().split(" ")[2])/10.0))
		def get_page(page,name=""):
			return [{"Name":"{}, {}".format(pq(x).find("td").eq(1).text().encode("utf-8"),pq(x).find("td").eq(2).text().encode("utf-8")),"Email":pq(x).find("a[href^=mailto]").text().encode("utf-8")} for x in pq(base_url.format(page,name))("table tr:not(.details):gt(0)")]
		with concurrent.futures.ThreadPoolExecutor(max_workers=num_pages) as thread:
			dat = list(itertools.chain(*list(thread.map(partial(get_page,name=name),range(1,num_pages+1)))))
			pd.DataFrame(data=dat,columns=["Name","Email"]).to_csv(filename,index=False)
			return dat
	else:
		print "{} Skipped".format(name)
names = get_last_names(40000)
with concurrent.futures.ThreadPoolExecutor(max_workers=50) as thread:
	for i in range(0,40000,50):
		thread.map(get_name,names[i:i+50])
		time.sleep(.1)
compile_csvs(folder)
	linecache.checkcache(filename)
	line = linecache.getline(filename, lineno, f.f_globals)
	print 'EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj)

folder = os.path.basename(__file__).split(".")[0]
base_url = "http://www.slu.edu/peoplefinder/json/json_index.php"

def get_name(name):
	filename = "{}/{}.csv"
	if not(os.path.exists(filename)):
		print name
		data = {"q": name}
		def get_data():
			dat = json.loads(pq(base_url,data=data,method="post")("p").text()).get("resultSet").get("result")
			return [{"Name":x.get("fullname")[0],"Email":x.get("email")[0] if x.get("email") else None} for x in dat if x.get("affiliation")[0].lower() == "student"]
		try:
			dat = get_data()
			pd.DataFrame(data=dat,columns=def_col).to_csv(filename.format(folder,name),index=False)
		except:
			return get_name(name)
	else:
		print "{} Skipped".format(name)
with concurrent.futures.ThreadPoolExecutor(max_workers=50) as thread:
	thread.map(get_name,get_last_names(2500))

print compile_csvs(folder)




print("--- %s seconds ---" % (time.time() - start_time))
            jq = pq(x.get_attribute("outerHTML"))
            over = len(jq(".partialResults"))
            dat = [{"Name": pq(x).text().strip()} for x in jq(".resultsList li a p.name")]
            if over and layer == 0:
                dat = itertools.chain(
                    *[
                        get_data(browser=browser, major=major, first_name=x, layer=layer + 1)
                        for x in get_character_permutations(num_characters=1)
                    ]
                )
            pd.DataFrame(data=list(dat), columns=["Name"]).to_csv(filename, index=False)
            return dat
        else:
            print "Skipped {} {}".format(major, first_name)
            return []

    try:
        dat = list(itertools.chain(*[get_data(major=i) for i in majors]))
        browser.close()
        compile_csvs("clemson", duplicate="Name")
        print len(pd.read_csv("clemson.csv"))
    except Exception as e:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        print (exc_type, fname, exc_tb.tb_lineno)
        print e
        time.sleep(10)
        browser.close()
    compile_csvs()
    print "{} students found".format(len(pd.read_csv("clemson.csv")))