Esempio n. 1
0
from pattern import web
from BeautifulSoup import BeautifulSoup
from ghost import Ghost
import urllib2
import PyQt4

from ghost_crawl import KuKlueCrawler

# 저장할 파일변수
f = open('kucrawl.txt','w')

# range of new data format of kuklue
first = 32247
last = 35309

a = KuKlueCrawler(id='id',pw='pw')

for i in range(first,last+1):
	# i번째 lecture 평가 페이지 크롤링
	page, resource = a.main_search(lectureNum=i)
	page, resource = a.ghost.evaluate("document.getElementsByClassName('mainContent');")

	page = unicode(page[PyQt4.QtCore.QString(u'0')][PyQt4.QtCore.QString(u'outerHTML')]).encode('utf-8')

	lec = BeautifulSoup(page)
	for e in lec.findAll('br'):		# <br /> 태그 제거
		e.extract()

	lecInfo = lec.find('div',{'class':['lectureInfo box']})

	# 각 강의 정보 파일에 저장
Esempio n. 2
0
def view():
	# load Object data
	with open('objectData.pkl', 'rb') as input:
		lectureList = pickle.load(input)
		personList = pickle.load(input)

	# userId, userPw 받아옴
	userId = request.form['userId']
	userPw = request.form['userPw']
	# ghost 모듈을 이용하여 crawling
	a = KuKlueCrawler(id=userId,pw=userPw)
	page, resource = a.openPage('http://klue.kr/myLectureEval.php')
	page, resource = a.ghost.evaluate("document.getElementsByClassName('mainContent');")
	page = unicode(page[PyQt4.QtCore.QString(u'0')][PyQt4.QtCore.QString(u'outerHTML')]).encode('utf-8')

	lec = BeautifulSoup(page)

	lecContent = lec.find('div',{'class':['lectureEvalList']}).findAll('div',{'class':['content']})

	# 자신에 대한 Person 객체 생성
	me = Person()
	me.nickName = lecContent[0].find('div',{'class':['wrInfo']}).a.text.encode('utf-8')

	# 자신이 평가한 항목에 대해 평균 점수를 매김
	lectureCount = 0
	for item in lecContent:
		try:
			me.difficulty += len(item.find('div',{'class':['e difficulty']}).find('div','center').findAll('span','active'))
			me.total += len(item.find('div',{'class':['e total']}).find('div','center').findAll('span','active'))
			me.studyTime += len(item.find('div',{'class':['e studyTime']}).find('div','center').findAll('span','active'))
			me.attendance += len(item.find('div',{'class':['e attendance']}).find('div','center').findAll('span','active'))
			me.grade += len(item.find('div',{'class':['e grade']}).find('div','center').findAll('span','active'))
			me.achievement += len(item.find('div',{'class':['e achievement']}).find('div','center').findAll('span','active'))
			lectureCount+=1
		except:		# except가 뜨는 부분은 신형 데이터를 읽었을 경우
			me.difficulty /= lectureCount
			me.total /= lectureCount
			me.studyTime /= lectureCount
			me.attendance /= lectureCount
			me.grade /= lectureCount
			me.achievement /= lectureCount
			break


	# 유사도 계산 알고리즘에 넣고 결과값(유사한 사람) 반환
	result = topMatches(personList,me)

	classList = []

	# for calculating error
	totalCount = 0
	Etotal = 0.0
	Edifficulty = 0.0
	EstudyTime = 0.0
	Eattendance = 0.0
	Egrade = 0.0
	Eachievement = 0.0

	# 결과값에 대한 강의 정리
	for person in result:
		similarity = person[0]
		for lectureID in person[1].lectureList:
			for oneLecture in lectureList:
				if float(oneLecture.total)<2.5:
					continue
				if oneLecture.lectureID == lectureID:
					##### for calculating error #####
					Etotal += float(oneLecture.total)
					Edifficulty += float(oneLecture.difficulty)
					EstudyTime += float(oneLecture.studyTime)
					Eattendance += float(oneLecture.attendance)
					Egrade += float(oneLecture.grade)
					Eachievement += float(oneLecture.achievement)
					totalCount += 1
					#################################

					classList.append((similarity*float(oneLecture.total)/5,oneLecture))
					break

	Etotal /= totalCount
	Edifficulty /= totalCount
	EstudyTime /= totalCount
	Eattendance /= totalCount
	Egrade /= totalCount
	Eachievement /= totalCount

	print
	print "Total Eval Error : " + str(me.total-Etotal)
	print "difficulty Error : " + str(me.difficulty - Edifficulty)
	print "studyTime Error : " + str(me.studyTime - EstudyTime)
	print "attendance Error : " + str(me.attendance - Eattendance)
	print "grade Error : " + str(me.grade - Egrade)
	print "achievement Error " + str(me.achievement - Eachievement)
	print

	classList.sort(reverse=True)
	classList = [lecture[1] for lecture in classList]	# 유사도*total 점수 제거
	
	# page rendering
	return render_template('ShowLectures.html',classList=classList)
Esempio n. 3
0
from pattern import web
from BeautifulSoup import BeautifulSoup
from ghost import Ghost
import urllib2
import PyQt4

from ghost_crawl import KuKlueCrawler

# 저장할 파일변수
f = open('kucrawl.txt', 'w')

# range of new data format of kuklue
first = 32247
last = 35309

a = KuKlueCrawler(id='id', pw='pw')

for i in range(first, last + 1):
    # i번째 lecture 평가 페이지 크롤링
    page, resource = a.main_search(lectureNum=i)
    page, resource = a.ghost.evaluate(
        "document.getElementsByClassName('mainContent');")

    page = unicode(page[PyQt4.QtCore.QString(u'0')][PyQt4.QtCore.QString(
        u'outerHTML')]).encode('utf-8')

    lec = BeautifulSoup(page)
    for e in lec.findAll('br'):  # <br /> 태그 제거
        e.extract()

    lecInfo = lec.find('div', {'class': ['lectureInfo box']})
Esempio n. 4
0
def view():
    # load Object data
    with open('objectData.pkl', 'rb') as input:
        lectureList = pickle.load(input)
        personList = pickle.load(input)

    # userId, userPw 받아옴
    userId = request.form['userId']
    userPw = request.form['userPw']
    # ghost 모듈을 이용하여 crawling
    a = KuKlueCrawler(id=userId, pw=userPw)
    page, resource = a.openPage('http://klue.kr/myLectureEval.php')
    page, resource = a.ghost.evaluate(
        "document.getElementsByClassName('mainContent');")
    page = unicode(page[PyQt4.QtCore.QString(u'0')][PyQt4.QtCore.QString(
        u'outerHTML')]).encode('utf-8')

    lec = BeautifulSoup(page)

    lecContent = lec.find('div', {
        'class': ['lectureEvalList']
    }).findAll('div', {'class': ['content']})

    # 자신에 대한 Person 객체 생성
    me = Person()
    me.nickName = lecContent[0].find('div', {
        'class': ['wrInfo']
    }).a.text.encode('utf-8')

    # 자신이 평가한 항목에 대해 평균 점수를 매김
    lectureCount = 0
    for item in lecContent:
        try:
            me.difficulty += len(
                item.find('div', {
                    'class': ['e difficulty']
                }).find('div', 'center').findAll('span', 'active'))
            me.total += len(
                item.find('div', {
                    'class': ['e total']
                }).find('div', 'center').findAll('span', 'active'))
            me.studyTime += len(
                item.find('div', {
                    'class': ['e studyTime']
                }).find('div', 'center').findAll('span', 'active'))
            me.attendance += len(
                item.find('div', {
                    'class': ['e attendance']
                }).find('div', 'center').findAll('span', 'active'))
            me.grade += len(
                item.find('div', {
                    'class': ['e grade']
                }).find('div', 'center').findAll('span', 'active'))
            me.achievement += len(
                item.find('div', {
                    'class': ['e achievement']
                }).find('div', 'center').findAll('span', 'active'))
            lectureCount += 1
        except:  # except가 뜨는 부분은 신형 데이터를 읽었을 경우
            me.difficulty /= lectureCount
            me.total /= lectureCount
            me.studyTime /= lectureCount
            me.attendance /= lectureCount
            me.grade /= lectureCount
            me.achievement /= lectureCount
            break

    # 유사도 계산 알고리즘에 넣고 결과값(유사한 사람) 반환
    result = topMatches(personList, me)

    classList = []

    # for calculating error
    totalCount = 0
    Etotal = 0.0
    Edifficulty = 0.0
    EstudyTime = 0.0
    Eattendance = 0.0
    Egrade = 0.0
    Eachievement = 0.0

    # 결과값에 대한 강의 정리
    for person in result:
        similarity = person[0]
        for lectureID in person[1].lectureList:
            for oneLecture in lectureList:
                if float(oneLecture.total) < 2.5:
                    continue
                if oneLecture.lectureID == lectureID:
                    ##### for calculating error #####
                    Etotal += float(oneLecture.total)
                    Edifficulty += float(oneLecture.difficulty)
                    EstudyTime += float(oneLecture.studyTime)
                    Eattendance += float(oneLecture.attendance)
                    Egrade += float(oneLecture.grade)
                    Eachievement += float(oneLecture.achievement)
                    totalCount += 1
                    #################################

                    classList.append(
                        (similarity * float(oneLecture.total) / 5, oneLecture))
                    break

    Etotal /= totalCount
    Edifficulty /= totalCount
    EstudyTime /= totalCount
    Eattendance /= totalCount
    Egrade /= totalCount
    Eachievement /= totalCount

    print
    print "Total Eval Error : " + str(me.total - Etotal)
    print "difficulty Error : " + str(me.difficulty - Edifficulty)
    print "studyTime Error : " + str(me.studyTime - EstudyTime)
    print "attendance Error : " + str(me.attendance - Eattendance)
    print "grade Error : " + str(me.grade - Egrade)
    print "achievement Error " + str(me.achievement - Eachievement)
    print

    classList.sort(reverse=True)
    classList = [lecture[1] for lecture in classList]  # 유사도*total 점수 제거

    # page rendering
    return render_template('ShowLectures.html', classList=classList)