def start(self): content = json.loads(self.getContent()) conn = MysqlHelper.connect() cur = conn.cursor() cur.execute('drop table if exists fudan') cur.execute('create table if not exists fudan(id int(11) primary key auto_increment,title varchar(255),lesson_code varchar(255),start_time varchar(255),current_sem varchar(255),spend_time varchar(255),short_desc text,knowledge_res text,chapter_info text,common_prob text,teacher_info text,url varchar(255))') sql = 'insert into fudan(title,lesson_code,start_time,current_sem,spend_time,short_desc,knowledge_res,chapter_info,common_prob,teacher_info,url) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' for item in content["course"]: if not item["about"].find("lecture") == -1: continue value = [] url = "http://fudan.xuetangx.com" + item["about"] page = self.getPage(url) title = self.getTitle(page) value.append(title) info = self.getInfo1(page) for item in info: value.append(item[0] + ':' + self.tool.replace(item[1])) info2 = self.getInfo2(page) for item in info2: value.append(item[0] + ':' + re.sub(r'[\n\t]+',r'\n', self.tool.replace(item[1]), flags=re.S)) for x in range(4 - len(info2)): value.append('') teacherinfo = self.getTeacherInfo(page) teacher = "" for item in teacherinfo: str = item[0] + '\n' + item[1] + '\n' + self.tool.replace(item[2]) + '\n' teacher = teacher + str value.append(teacher) value.append(url) MysqlHelper.insert_one(cur,sql,value) MysqlHelper.finish(conn)
def start(self): conn = MysqlHelper.connect() cur = conn.cursor() cur.execute('drop table if exists pmphmooc_open') cur.execute( 'create table pmphmooc_open(id int(11) primary key auto_increment,title varchar(255),school varchar(255),teacher varchar(255),touxian varchar(255),resume text,hitcount varchar(255),url varchar(255))' ) sql = 'insert into pmphmooc_open(title,school,teacher,touxian,resume,hitcount,url) values(%s,%s,%s,%s,%s,%s,%s)' content = json.loads(self.getJson()) for item in content["rows"]: oneline = Item() oneline.title = item["name"] oneline.url = 'http://www.pmphmooc.com/web/opencoursedetail?courseid=' + str( item["id"]) oneline.school = item["agencyname"] oneline.hitcount = item["hitcount"] if item.has_key("username"): oneline.teacher = item["username"] if item.has_key("touxian"): oneline.touxian = item["touxian"] if item.has_key("resume"): oneline.resume = item["resume"] value = [] value.append(oneline.title) value.append(oneline.school) value.append(oneline.teacher) value.append(oneline.touxian) value.append(oneline.resume) value.append(oneline.hitcount) value.append(oneline.url) MysqlHelper.insert_one(cur, sql, value) MysqlHelper.finish(conn)
def start(self): conn = MysqlHelper.connect() cur = conn.cursor() cur.execute('drop table if exists pmphmooc_open') cur.execute('create table pmphmooc_open(id int(11) primary key auto_increment,title varchar(255),school varchar(255),teacher varchar(255),touxian varchar(255),resume text,hitcount varchar(255),url varchar(255))') sql = 'insert into pmphmooc_open(title,school,teacher,touxian,resume,hitcount,url) values(%s,%s,%s,%s,%s,%s,%s)' content = json.loads(self.getJson()) for item in content["rows"]: oneline = Item() oneline.title = item["name"] oneline.url = 'http://www.pmphmooc.com/web/opencoursedetail?courseid=' + str(item["id"]) oneline.school = item["agencyname"] oneline.hitcount = item["hitcount"] if item.has_key("username"): oneline.teacher = item["username"] if item.has_key("touxian"): oneline.touxian = item["touxian"] if item.has_key("resume"): oneline.resume = item["resume"] value = [] value.append(oneline.title) value.append(oneline.school) value.append(oneline.teacher) value.append(oneline.touxian) value.append(oneline.resume) value.append(oneline.hitcount) value.append(oneline.url) MysqlHelper.insert_one(cur,sql,value) MysqlHelper.finish(conn)
def start(self): conn = MysqlHelper.connect() cur = conn.cursor() cur.execute('drop table if exists mooccollege') cur.execute('create table mooccollege(id int(11) primary key auto_increment,title varchar(255),teacher varchar(255),school varchar(255),type varchar(255))') sql = 'insert into mooccollege(title,teacher,school,type) values(%s,%s,%s,%s)' for i in range(1,5): oneline = Item() page = self.getPage(i) info = self.getInfo(page) for item in info: # print item[0],item[1],item[2] oneline.title = item[0] oneline.teacher = item[1] oneline.school = item[2] if i == 1: oneline.type = "冲刺专题" elif i == 2: oneline.type = "考题解析" elif i == 3: oneline.type = "同步教材" else: oneline.type = "知识模块" value = [] value.append(oneline.title) value.append(oneline.teacher) value.append(oneline.school) value.append(oneline.type) MysqlHelper.insert_one(cur,sql,value) MysqlHelper.finish(conn)
def start(self): indexPage = self.getPage('http://computer.icourses.cn/') conn = MysqlHelper.connect() cur = conn.cursor() cur.execute('drop table if exists computer_icourses') cur.execute( 'create table computer_icourses(id int(11) primary key auto_increment,title varchar(255),short_desc text,description text,requirement text,pre_knowledge text,chapter text,reference text,common_prob text,teacher text,url varchar(255))' ) sql = 'insert into computer_icourses(title,short_desc,description,requirement,pre_knowledge,chapter,reference,common_prob,teacher,url) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' url = self.getURL(indexPage) for item in url: oneline = Item() oneline.url = item page = self.getPage(item) title = self.getTitle(page) oneline.title = title shortDesc = self.getShortDesc(page) oneline.short_desc = shortDesc info = self.getInfo(page) for item in info: if item[0] == '课程概述': oneline.description = re.sub(self.tool.replaceNBSP, " ", self.tool.replace(item[1])) if item[0] == '证书要求': oneline.requirement = re.sub(self.tool.replaceNBSP, " ", self.tool.replace(item[1])) if item[0] == '预备知识': oneline.pre_knowledge = re.sub(self.tool.replaceNBSP, " ", self.tool.replace(item[1])) if item[0] == '授课大纲': oneline.chapter = re.sub(self.tool.replaceNBSP, " ", self.tool.replace(item[1])) if item[0] == '参考资料': oneline.reference = re.sub(self.tool.replaceNBSP, " ", self.tool.replace(item[1])) if item[0] == '常见问题': oneline.common_prob = re.sub(self.tool.replaceNBSP, " ", self.tool.replace(item[1])) teacher = self.getTeacher(page) teacherstr = "" for item in teacher: teacherstr = teacherstr + item + '\n' oneline.teacher = teacherstr value = [] value.append(oneline.title) value.append(oneline.short_desc) value.append(oneline.description) value.append(oneline.requirement) value.append(oneline.pre_knowledge) value.append(oneline.chapter) value.append(oneline.reference) value.append(oneline.common_prob) value.append(oneline.teacher) value.append(oneline.url) MysqlHelper.insert_one(cur, sql, value) MysqlHelper.finish(conn)
def getCourceInfo(self): cource_url_list = self.getCourceUrl(self.url) conn = MysqlHelper.connect() cur = conn.cursor() cur.execute('drop table if exists jisuanke') cur.execute('create table jisuanke(id int(11) primary key auto_increment,title varchar(255),time varchar(255),learn_count varchar(255),short_desc text,outline text)') sql = 'insert into jisuanke(title,time,learn_count,short_desc,outline) values(%s,%s,%s,%s,%s)' #file = open("JiSuanke.txt","w+") for pageurl in cource_url_list: value = [] cource_url = "http:" + pageurl page = self.getPageInfo(cource_url) courceName = self.getCourceName(page) title = self.removeTab(courceName[0]) value.append(title) #file.write('\n'+'课程题目:' + title) #print title #print courceName[0] courceTime = self.getCourceTime(page) times = self.removeTab(courceTime[0]) value.append(times) #file.write('\n'+'课程时长:' + times) #print times peopleNum = self.getPeopleNum(page) value.append( peopleNum[0]) #file.write('\n'+'学习人数:' + peopleNum[0]) #print peopleNum[0] #brief = self.getBrief(page) #file.write('\n'+'课程介绍:'+ brief[0]) brief = self.getClassInfo(page) value.append(brief) #print brief #file.write('\n' + '课程介绍:' + brief) courseInfo= self.getInfo(page) #file.write('\n'+'课程目录:') str = "" if courseInfo: for item in courseInfo: str= str + item[0] + ':' + item[1] + '\n' #file.write('\n\t' + item[0] + ':' + item[1]) #print item[0] #print item[1] pattern = re.compile(r'<li>(.*?)</li>',re.S) li = re.findall(pattern, item[2]) for info in li: str = str + info value.append(str) MysqlHelper.insert_one(cur,sql,value) MysqlHelper.finish(conn)
def start(self): indexPage = self.getPage('http://computer.icourses.cn/') conn = MysqlHelper.connect() cur = conn.cursor() cur.execute('drop table if exists computer_icourses') cur.execute('create table computer_icourses(id int(11) primary key auto_increment,title varchar(255),short_desc text,description text,requirement text,pre_knowledge text,chapter text,reference text,common_prob text,teacher text,url varchar(255))') sql = 'insert into computer_icourses(title,short_desc,description,requirement,pre_knowledge,chapter,reference,common_prob,teacher,url) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' url = self.getURL(indexPage) for item in url: oneline = Item() oneline.url = item page = self.getPage(item) title = self.getTitle(page) oneline.title = title shortDesc = self.getShortDesc(page) oneline.short_desc = shortDesc info = self.getInfo(page) for item in info: if item[0] == '课程概述': oneline.description = re.sub(self.tool.replaceNBSP," ",self.tool.replace(item[1])) if item[0] == '证书要求': oneline.requirement = re.sub(self.tool.replaceNBSP," ",self.tool.replace(item[1])) if item[0] == '预备知识': oneline.pre_knowledge = re.sub(self.tool.replaceNBSP," ",self.tool.replace(item[1])) if item[0] == '授课大纲': oneline.chapter = re.sub(self.tool.replaceNBSP," ",self.tool.replace(item[1])) if item[0] == '参考资料': oneline.reference = re.sub(self.tool.replaceNBSP," ",self.tool.replace(item[1])) if item[0] == '常见问题': oneline.common_prob = re.sub(self.tool.replaceNBSP," ",self.tool.replace(item[1])) teacher = self.getTeacher(page) teacherstr = "" for item in teacher: teacherstr = teacherstr + item + '\n' oneline.teacher = teacherstr value = [] value.append(oneline.title) value.append(oneline.short_desc) value.append(oneline.description) value.append(oneline.requirement) value.append(oneline.pre_knowledge) value.append(oneline.chapter) value.append(oneline.reference) value.append(oneline.common_prob) value.append(oneline.teacher) value.append(oneline.url) MysqlHelper.insert_one(cur,sql,value) MysqlHelper.finish(conn)
def start(self): conn = MysqlHelper.connect() cur = conn.cursor() cur.execute('drop table if exists pmphmooc') cur.execute('create table pmphmooc(id int(11) primary key auto_increment,title varchar(255),description text,chapter text,course_begin varchar(255),course_end varchar(255),course_totaltime varchar(255),course_load varchar(255),teacher text,block text,url varchar(255))') sql = 'insert into pmphmooc(title,description,chapter,course_begin,course_end,course_totaltime,course_load,teacher,block,url) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' content = json.loads(self.getJson()) for item in content["rows"]: oneline = Item() # print item["name"],item["id"] oneline.title = item["name"] oneline.url = 'http://www.pmphmooc.com/web/scholl/' + str(item["id"]) page = self.getPage(item["id"]) # print page description = self.getDescription(page) oneline.description = self.tool.replace(description) beginAndEnd = self.getBeginAndEnd(page) for item in beginAndEnd: oneline.course_begin = item[0] oneline.course_end = item[1] totalAndLoad = self.getTotalAndLoad(page) oneline.course_totaltime = totalAndLoad[0] oneline.course_load = totalAndLoad[1] teacher = self.getTeacher(page) teastr = "" for item in teacher: teastr = teastr + item[0] + '\n' + item[1] + '\n' oneline.teacher = teastr block = self.getBlock(page) oneline.block = block chapterWords = ["授课大纲","课程章节"] chapter = self.getText(block,chapterWords) if chapter: oneline.chapter = re.sub(self.tool.replaceNBSP,"",self.tool.replace(chapter)) value = [] value.append(oneline.title) value.append(oneline.description) value.append(oneline.chapter) value.append(oneline.course_begin) value.append(oneline.course_end) value.append(oneline.course_totaltime) value.append(oneline.course_load) value.append(oneline.teacher) value.append(oneline.block) value.append(oneline.url) MysqlHelper.insert_one(cur,sql,value) MysqlHelper.finish(conn)
def start(self): indexPage = self.getContent(1) pageNum = self.getPageNum(indexPage) conn = MysqlHelper.connect() cur = conn.cursor() cur.execute('drop table if exists imooc') cur.execute( 'create table imooc(id int(11) primary key auto_increment,title varchar(255),difficulty varchar(255),time varchar(255),learn_count varchar(255),short_desc text,outline text)' ) sql = 'insert into imooc(title,difficulty,time,learn_count,short_desc,outline) values(%s,%s,%s,%s,%s,%s)' for i in range(1, int(pageNum) + 1): indexPage = self.getContent(i) ViewsId = self.getViewsId(indexPage) for item in ViewsId: value = [] learnpage = self.getLearnPage(item) viewpage = self.getViewPage(item) title = self.getTitle(learnpage) value.append(title) info = self.getLevelTimeAndCount(learnpage) infos = [] for item in info: item = self.tool.replace(item) infos.append(item) value.append(infos[0]) value.append(infos[1]) value.append(infos[2]) brief = self.getBrief(viewpage) value.append(brief) outline = self.getOutline(learnpage) str = "" for item in outline: str = str + self.tool.replace(item[0]) + '\n' pattern = re.compile('<li>(.*?)</li>', re.S) result = re.findall(pattern, item[1]) if result: for item in result: item = re.sub(self.tool.removeAddr, "", item) item = re.sub(self.tool.replaceLT, "<", item) item = re.sub(self.tool.replaceGT, ">", item) str = str + item.strip() + '\n' value.append(str) MysqlHelper.insert_one(cur, sql, value) MysqlHelper.finish(conn)
def start(self): indexPage = self.getContent(1) pageNum = self.getPageNum(indexPage) conn = MysqlHelper.connect() cur = conn.cursor() cur.execute('drop table if exists imooc') cur.execute('create table imooc(id int(11) primary key auto_increment,title varchar(255),difficulty varchar(255),time varchar(255),learn_count varchar(255),short_desc text,outline text)') sql = 'insert into imooc(title,difficulty,time,learn_count,short_desc,outline) values(%s,%s,%s,%s,%s,%s)' for i in range(1,int(pageNum)+1): indexPage = self.getContent(i) ViewsId = self.getViewsId(indexPage) for item in ViewsId: value = [] learnpage = self.getLearnPage(item) viewpage = self.getViewPage(item) title = self.getTitle(learnpage) value.append(title) info = self.getLevelTimeAndCount(learnpage) infos = [] for item in info: item = self.tool.replace(item) infos.append(item) value.append(infos[0]) value.append(infos[1]) value.append(infos[2]) brief = self.getBrief(viewpage) value.append(brief) outline = self.getOutline(learnpage) str = "" for item in outline: str = str + self.tool.replace(item[0]) + '\n' pattern = re.compile('<li>(.*?)</li>',re.S) result = re.findall(pattern,item[1]) if result: for item in result: item = re.sub(self.tool.removeAddr,"",item) item = re.sub(self.tool.replaceLT,"<",item) item = re.sub(self.tool.replaceGT,">",item) str = str + item.strip() + '\n' value.append(str) MysqlHelper.insert_one(cur,sql,value) MysqlHelper.finish(conn)
def getCpuAndMemory(self): list_cpu = [] list_vss = [] list_rss = [] packageName = self.packageName saveFileName = self.saveFileName file = self.file infofile = self.infofile line = file.readline() while line: temp_result = line.replace('\n', '').split() if temp_result[9] == packageName: infofile.writelines(line) line = file.readline() infofile.close() resultFile = open(saveFileName) resultLine = resultFile.readline() while resultLine: temp_line = resultLine.replace('\n', '').split() list_cpu.append(int(temp_line[2][:-1])) list_vss.append(int(temp_line[5][:-1])) list_rss.append(int(temp_line[6][:-1])) resultLine = resultFile.readline() resultFile.close() cpumax = max(list_cpu) cpuavg = "%.2f" % (float(sum(list_cpu)) / len(list_cpu)) vsizemax = max(list_vss) vsizeavg = sum(list_vss) / len(list_vss) rssmax = max(list_rss) rssavg = sum(list_rss) / len(list_rss) print cpumax, cpuavg, vsizemax, vsizeavg, rssmax, rssavg sqlquery = ("UPDATE %s " + "SET cpumax = '%s',cpuavg = '%s'," + "vsizemax = '%s',vsizeavg = '%s'," + "rssmax = '%s',rssavg = '%s' " + "WHERE devicemodel = '%s' ") % ( self.appName, cpumax, cpuavg, vsizemax, vsizeavg, rssmax, rssavg, self.deviceModel) print sqlquery cxn = MysqlHelper.connect() cur = cxn.cursor() res = MysqlHelper.update(cur, sqlquery) MysqlHelper.finish(cxn)
def getCpuAndMemory(self): list_cpu = [] list_vss = [] list_rss = [] packageName = self.packageName saveFileName = self.saveFileName file = self.file infofile = self.infofile line = file.readline() while line: temp_result = line.replace('\n','').split() if temp_result[9] == packageName: infofile.writelines(line) line = file.readline() infofile.close() resultFile = open(saveFileName) resultLine = resultFile.readline() while resultLine: temp_line = resultLine.replace('\n','').split() list_cpu.append(int(temp_line[2][:-1])) list_vss.append(int(temp_line[5][:-1])) list_rss.append(int(temp_line[6][:-1])) resultLine = resultFile.readline() resultFile.close() cpumax = max(list_cpu) cpuavg = "%.2f" % (float(sum(list_cpu))/len(list_cpu)) vsizemax = max(list_vss) vsizeavg = sum(list_vss)/len(list_vss) rssmax = max(list_rss) rssavg = sum(list_rss)/len(list_rss) print cpumax,cpuavg,vsizemax,vsizeavg,rssmax,rssavg sqlquery = ("UPDATE %s " + "SET cpumax = '%s',cpuavg = '%s'," + "vsizemax = '%s',vsizeavg = '%s'," + "rssmax = '%s',rssavg = '%s' " + "WHERE devicemodel = '%s' ")%(self.appName,cpumax,cpuavg,vsizemax,vsizeavg,rssmax,rssavg,self.deviceModel) print sqlquery cxn = MysqlHelper.connect() cur = cxn.cursor() res = MysqlHelper.update(cur , sqlquery) MysqlHelper.finish(cxn)
def start(self): content = json.loads(self.getContent()) #file = open("tsinghua.txt","w+") conn = MysqlHelper.connect() cur = conn.cursor() cur.execute('drop table if exists tsinghua') cur.execute('create table if not exists tsinghua(id int(11) primary key auto_increment,title varchar(255),lesson_code varchar(255),start_time varchar(255),current_sem varchar(255),spend_time varchar(255),short_desc text,knowledge_res text,chapter_info text,common_prob text,teacher_info text,url varchar(255))') sql = 'insert into tsinghua(title,lesson_code,start_time,current_sem,spend_time,short_desc,knowledge_res,chapter_info,common_prob,teacher_info,url) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' for item in content["course"]: if not item["about"].find("lecture") == -1: continue value = [] #url = self.getUrl(content) #把url符号变成unicode形式 #page = self.getPage(urllib.quote_plus("http://tsinghua.xuetangx.com/courses/TSINGHUA/MOOC001/2014_T2/about")) url = "http://tsinghua.xuetangx.com" + item["about"] page = self.getPage(url) title = self.getTitle(page) value.append(title) info = self.getInfo1(page) for item in info: value.append(item[0] + ':' + self.tool.replace(item[1])) info2 = self.getInfo2(page) for item in info2: #print item[0] + ':' + self.tool.replace(item[1]) value.append(item[0] + ':' + re.sub(r'[\n\t]+',r'\n', self.tool.replace(item[1]), flags=re.S)) for x in range(4 - len(info2)): value.append('') #teacher = self.getTeacher(page) # print teacher teacherinfo = self.getTeacherInfo(page) teacher = "" for item in teacherinfo: str = item[0] + '\n' + item[1] + '\n' + self.tool.replace(item[2]) + '\n' teacher = teacher + str value.append(teacher) value.append(url) MysqlHelper.insert_one(cur,sql,value) MysqlHelper.finish(conn)
def start(self): content = json.loads(self.getContent()) conn = MysqlHelper.connect() cur = conn.cursor() cur.execute('drop table if exists xjtu') cur.execute('create table if not exists xjtu(id int(11) primary key auto_increment,title varchar(255),lesson_code varchar(255),start_time varchar(255),current_sem varchar(255),spend_time varchar(255),short_desc text,knowledge_res text,chapter_info text,common_prob text,teacher_info text,url varchar(255))') sql = 'insert into xjtu(title,lesson_code,start_time,current_sem,spend_time,short_desc,knowledge_res,chapter_info,common_prob,teacher_info,url) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' for item in content["course"]: oneline = Course() url = "http://xjtu.xuetangx.com" + item["about"] page = self.getPage(url) title = self.getTitle(page) oneline.title = title info = self.getInfo1(page) for item in info: if item[0] == "课程代码": oneline.lesson_code = self.tool.replace(item[1]) if item[0] == "开课时间": oneline.start_time = self.tool.replace(item[1]) if item[0] == "当前学期": oneline.current_sem = self.tool.replace(item[1]) if item[0] == "投入时间": oneline.spend_time = self.tool.replace(item[1]) info2 = self.getInfo2(page) for item in info2: if item[0] == "课程简介": oneline.short_desc = re.sub(r'[\n\t]+',r'\n', self.tool.replace(item[1]), flags=re.S) if item[0] == "知识储备": oneline.knowledge_res = re.sub(r'[\n\t]+',r'\n', self.tool.replace(item[1]), flags=re.S) if item[0] == "章节信息": oneline.chapter_info = re.sub(r'[\n\t]+',r'\n', self.tool.replace(item[1]), flags=re.S) if item[0] == "常见问题": oneline.common_prob = re.sub(r'[\n\t]+',r'\n', self.tool.replace(item[1]), flags=re.S) teacherinfo = self.getTeacherInfo(page) teacher = "" if teacherinfo: for item in teacherinfo: str = item[0] + '\n' + item[1] + '\n' + self.tool.replace(item[2]) + '\n' teacher = teacher + str oneline.teacher_info = teacher oneline.url = url value = [] value.append(oneline.title) value.append(oneline.lesson_code) value.append(oneline.start_time) value.append(oneline.current_sem) value.append(oneline.spend_time) value.append(oneline.short_desc) value.append(oneline.knowledge_res) value.append(oneline.chapter_info) value.append(oneline.common_prob) value.append(oneline.teacher_info) value.append(oneline.url) MysqlHelper.insert_one(cur,sql,value) if content["lecture"]: cur.execute('drop table if exists xjtu_lecture') cur.execute('create table if not exists xjtu_lecture(id int(11) primary key auto_increment,title varchar(255),intro text,guest text,video_info text,addr text,url varchar(255))') sql = 'insert into xjtu_lecture(title,intro,guest,video_info,addr,url) values(%s,%s,%s,%s,%s,%s)' for item in content["lecture"]: oneline = Lecture() url = "http://xjtu.xuetangx.com" + item["about"] page = self.getPage(url) title = self.getLectureTitle(page) oneline.title = title intro = self.getLectureIntro(page) oneline.intro = self.tool.replace(intro) guest = self.getLectureGuest(page) oneline.guest = self.tool.replace(guest) videoInfo = self.getVideoInfo(page) oneline.video_info = self.tool.replace(videoInfo) addr = self.getLectureInfo(page) oneline.addr = self.tool.replace(addr) oneline.url = url value = [] value.append(oneline.title) value.append(oneline.intro) value.append(oneline.guest) value.append(oneline.video_info) value.append(oneline.addr) value.append(oneline.url) MysqlHelper.insert_one(cur,sql,value) MysqlHelper.finish(conn)
def start(self): indexPage = self.getContent(1) # print indexPage pageNum = self.getPageNum(indexPage) # print pageNum[-2] conn = MysqlHelper.connect() cur = conn.cursor() # cur.execute('drop table if exists mooc') # cur.execute('create table mooc(id int(11) primary key auto_increment,title_chinese varchar(255),title_english varchar(255),brief text,teacher text,chapter text,requires text,form text,question text,resource text,url varchar(255))') sql = 'insert into mooc(title_chinese,title_english,brief,teacher,chapter,requires,form,question,resource,url) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' for i in range(78,int(pageNum[-2])+1): # for i in range(1,2): indexPage = self.getContent(i) URL = self.getURL(indexPage) for item in URL: oneline = Item() page = self.getPage(item) # print page title_chinese = self.getTitle_chinese(page) oneline.title_chinese = title_chinese title_english = self.getTitle_english(page) oneline.title_english = title_english # print title_chinese,title_english block = self.getBlock(page) #添加的</div>用户判断文本结尾 block = block + '</div>' # print block + '\n' briefWords = ["课程概述","课程概况","课程简介"] if self.wordInText(block,briefWords): brief = self.getText(block,briefWords) oneline.brief = self.tool.replace(brief) # print brief teacherWords = ["授课教师","主讲教师"] if self.wordInText(block,teacherWords): teacher = self.getText(block,teacherWords) oneline.teacher = self.tool.replace(teacher) # print teacher chapterWords = ["授课大纲","课程大纲"] if self.wordInText(block,chapterWords): chapter = self.getText(block,chapterWords) oneline.chapter = self.tool.replace(chapter) # print chapter requireWords = ["先修要求","先修知识","背景知识"] if self.wordInText(block,requireWords): require = self.getText(block,requireWords) oneline.require = self.tool.replace(require) # print require formWords = ["授课形式"] if self.wordInText(block,formWords): form = self.getText(block,formWords) oneline.form = self.tool.replace(form) # print form questionWords = ["常见问题解答","常见问题"] if self.wordInText(block,questionWords): question = self.getText(block,questionWords) oneline.question = self.tool.replace(question) # print question resourceWords = ["参考资料"] if self.wordInText(block,resourceWords): resource = self.getText(block,resourceWords) oneline.resource = self.tool.replace(resource) # print self.tool.replace(resource) #url oneline.url = item value = [] value.append(oneline.title_chinese) value.append(oneline.title_english) value.append(oneline.brief) value.append(oneline.teacher) value.append(oneline.chapter) value.append(oneline.require) value.append(oneline.form) value.append(oneline.question) value.append(oneline.resource) value.append(oneline.url) MysqlHelper.insert_one(cur,sql,value) MysqlHelper.finish(conn)
def start(self): content = json.loads(self.getContent()) conn = MysqlHelper.connect() cur = conn.cursor() cur.execute('drop table if exists jxufe') cur.execute('create table if not exists jxufe(id int(11) primary key auto_increment,title varchar(255),lesson_code varchar(255),start_time varchar(255),current_sem varchar(255),spend_time varchar(255),short_desc text,knowledge_res text,chapter_info text,common_prob text,teacher_info text,url varchar(255))') sql = 'insert into jxufe(title,lesson_code,start_time,current_sem,spend_time,short_desc,knowledge_res,chapter_info,common_prob,teacher_info,url) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' for item in content["course"]: oneline = Course() url = "http://mooc.jxufe.edu.cn" + item["about"] page = self.getPage(url) title = self.getTitle(page) oneline.title = title info = self.getInfo1(page) for item in info: if item[0] == "课程代码": oneline.lesson_code = self.tool.replace(item[1]) if item[0] == "开课时间": oneline.start_time = self.tool.replace(item[1]) if item[0] == "当前学期": oneline.current_sem = self.tool.replace(item[1]) if item[0] == "投入时间": oneline.spend_time = self.tool.replace(item[1]) info2 = self.getInfo2(page) for item in info2: if item[0] == "课程简介": oneline.short_desc = re.sub(r'[\n\t]+',r'\n', self.tool.replace(item[1]), flags=re.S) if item[0] == "知识储备": oneline.knowledge_res = re.sub(r'[\n\t]+',r'\n', self.tool.replace(item[1]), flags=re.S) if item[0] == "章节信息": oneline.chapter_info = re.sub(r'[\n\t]+',r'\n', self.tool.replace(item[1]), flags=re.S) if item[0] == "常见问题": oneline.common_prob = re.sub(r'[\n\t]+',r'\n', self.tool.replace(item[1]), flags=re.S) teacherinfo = self.getTeacherInfo(page) teacher = "" if teacherinfo: for item in teacherinfo: str = item[0] + '\n' + item[1] + '\n' + self.tool.replace(item[2]) + '\n' teacher = teacher + str oneline.teacher_info = teacher oneline.url = url value = [] value.append(oneline.title) value.append(oneline.lesson_code) value.append(oneline.start_time) value.append(oneline.current_sem) value.append(oneline.spend_time) value.append(oneline.short_desc) value.append(oneline.knowledge_res) value.append(oneline.chapter_info) value.append(oneline.common_prob) value.append(oneline.teacher_info) value.append(oneline.url) MysqlHelper.insert_one(cur,sql,value) if content["lecture"]: cur.execute('drop table if exists jxufe_lecture') cur.execute('create table if not exists jxufe_lecture(id int(11) primary key auto_increment,title varchar(255),intro text,guest text,video_info text,addr text,url varchar(255))') sql = 'insert into jxufe_lecture(title,intro,guest,video_info,addr,url) values(%s,%s,%s,%s,%s,%s)' for item in content["lecture"]: oneline = Lecture() url = "http://mooc.jxufe.edu.cn/" + item["about"] page = self.getPage(url) title = self.getLectureTitle(page) oneline.title = title intro = self.getLectureIntro(page) oneline.intro = self.tool.replace(intro) guest = self.getLectureGuest(page) oneline.guest = self.tool.replace(guest) videoInfo = self.getVideoInfo(page) oneline.video_info = self.tool.replace(videoInfo) addr = self.getLectureInfo(page) oneline.addr = self.tool.replace(addr) oneline.url = url value = [] value.append(oneline.title) value.append(oneline.intro) value.append(oneline.guest) value.append(oneline.video_info) value.append(oneline.addr) value.append(oneline.url) MysqlHelper.insert_one(cur,sql,value) MysqlHelper.finish(conn)
body = browser.find_element_by_xpath("/html/body/pre").text print("start " + stock) data = json.loads(body) chouma = data["data"]["result"]["result"] if len(chouma) > 0: chouma_data = chouma[0] row = helper.fetchone(sql_select, [stock]) if not row: helper.insert(sql_insert, chouma_data) print("insert " + stock) else: chouma_data.append(stock) helper.update(sql_update, chouma_data) time.sleep(0.01) print("update " + stock) except Exception as e: print(e) # raise e helper = MysqlHelper('okaiok.com', 'root', 'qq84607952', 'walle') helper.connect() sql_select = "SELECT stock FROM chouma WHERE stock= %s " sql_update = "UPDATE chouma SET CODE=%s,stock=%s,price=%s,zhangfu=%s,huoli=%s,costdown90=%s,costup90=%s,costavg=%s,focus70=%s,focus90=%s WHERE stock = %s " sql_insert = "insert into chouma (code,stock,price,zhangfu,huoli,costdown90,costup90,costavg,focus70,focus90) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" for stock in list: url_name = quote(stock + "筹码分布") url = "http://www.iwencai.com/stockpick/load-data?typed=0&preParams=&ts=1&f=1&qs=result_original&selfsectsn=&querytype=stock&searchfilter=&tid=stockpick&w=" + url_name + "&queryarea=" spider_chouma(url, stock)
def start(self): indexPage = self.getContent(1) # print indexPage pageNum = self.getPageNum(indexPage) # print pageNum[-2] conn = MysqlHelper.connect() cur = conn.cursor() # cur.execute('drop table if exists mooc') # cur.execute('create table mooc(id int(11) primary key auto_increment,title_chinese varchar(255),title_english varchar(255),brief text,teacher text,chapter text,requires text,form text,question text,resource text,url varchar(255))') sql = 'insert into mooc(title_chinese,title_english,brief,teacher,chapter,requires,form,question,resource,url) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' for i in range(78, int(pageNum[-2]) + 1): # for i in range(1,2): indexPage = self.getContent(i) URL = self.getURL(indexPage) for item in URL: oneline = Item() page = self.getPage(item) # print page title_chinese = self.getTitle_chinese(page) oneline.title_chinese = title_chinese title_english = self.getTitle_english(page) oneline.title_english = title_english # print title_chinese,title_english block = self.getBlock(page) #添加的</div>用户判断文本结尾 block = block + '</div>' # print block + '\n' briefWords = ["课程概述", "课程概况", "课程简介"] if self.wordInText(block, briefWords): brief = self.getText(block, briefWords) oneline.brief = self.tool.replace(brief) # print brief teacherWords = ["授课教师", "主讲教师"] if self.wordInText(block, teacherWords): teacher = self.getText(block, teacherWords) oneline.teacher = self.tool.replace(teacher) # print teacher chapterWords = ["授课大纲", "课程大纲"] if self.wordInText(block, chapterWords): chapter = self.getText(block, chapterWords) oneline.chapter = self.tool.replace(chapter) # print chapter requireWords = ["先修要求", "先修知识", "背景知识"] if self.wordInText(block, requireWords): require = self.getText(block, requireWords) oneline.require = self.tool.replace(require) # print require formWords = ["授课形式"] if self.wordInText(block, formWords): form = self.getText(block, formWords) oneline.form = self.tool.replace(form) # print form questionWords = ["常见问题解答", "常见问题"] if self.wordInText(block, questionWords): question = self.getText(block, questionWords) oneline.question = self.tool.replace(question) # print question resourceWords = ["参考资料"] if self.wordInText(block, resourceWords): resource = self.getText(block, resourceWords) oneline.resource = self.tool.replace(resource) # print self.tool.replace(resource) #url oneline.url = item value = [] value.append(oneline.title_chinese) value.append(oneline.title_english) value.append(oneline.brief) value.append(oneline.teacher) value.append(oneline.chapter) value.append(oneline.require) value.append(oneline.form) value.append(oneline.question) value.append(oneline.resource) value.append(oneline.url) MysqlHelper.insert_one(cur, sql, value) MysqlHelper.finish(conn)