Exemple #1
0
def getAllCourse():
    #get first course list page and the value of total page
    url = "http://www.chinesemooc.org/api/search_by_classid.php?classid=all";
    user_agent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0"
    header = {'User-Agent':user_agent}
    html_page = getHtml(url, header)
    if html_page == None:
        print("ERROR:Get data from Chinese Mooc failed...")
        return

    course_list = eval(html_page)
    total_page_num = course_list['msg']['page_total']
    if course_list.has_key('msg') and course_list['msg'].has_key('page_total'):
        total_page_num = course_list['msg']['page_total']

    conn = MySqlHelper.connect()
    cur = conn.cursor()
    cur.execute('drop table if exists chinesemooc')
    cur.execute('create table chinesemooc(course_id int(11) primary key,course_title varchar(255),\
course_term varchar(255),course_outline text,course_view_num int(11),course_comment_num int(11),\
course_price int(11),signup int(11),course_des text,teacher_info text,assistant text,school varchar(255))')
    sql = 'insert into chinesemooc(course_id,course_title,course_term,course_outline,course_view_num,course_comment_num,\
course_price,signup,course_des,teacher_info,assistant,school) values(%d,"%s","%s","%s",%d,%d,%d,%d,"%s","%s","%s","%s")'
    getAllCourseInfo(course_list,header,cur,sql)
    for page_index in range(2,total_page_num+1):
        page_url =url + "&page=" + str(page_index)
        tmp_page = getHtml(page_url,header)
        if tmp_page == None:
            continue

        tmp_course_list = eval(tmp_page)
        getAllCourseInfo(tmp_course_list,header,cur,sql)
        time.sleep(10)
    MySqlHelper.finish(conn)
Exemple #2
0
def getAllCourseInfo(course_list_dic,header,cur,sql):
    if course_list_dic == None:
        return
    if not course_list_dic.has_key('msg'):
        return
    if not course_list_dic['msg'].has_key('list'):
        return
    course_list = course_list_dic['msg']['list']
    course_num = len(course_list)
    file = open("course.txt","w+")
    item_list = []
    for course_index in range(0,course_num):
        item = Item()
        item.course_id = int(course_list[course_index]['kvideoid'])
        print(item.course_id)
        item.course_view_num = int(course_list[course_index]['viewnum'])
        print(item.course_view_num)
        item.course_title = course_list[course_index]['subject']
        item.course_title = uni2utf(item.course_title)
        print(item.course_title)
        item.course_price = int(course_list[course_index]['price'])
        print(item.course_price)
        item.course_signup = int(course_list[course_index]['signup'])
        print(item.course_signup)
        item.comment_num = int(course_list[course_index]['comment_num'])
        print(item.comment_num)
        item.course_des = uni2utf(course_list[course_index]['kvideo_desc'])
        print(item.course_des)
        item.school = course_list[course_index]['teacher_info']['school_name']
        item.school = uni2utf(item.school)
        print(item.school)

        tmp_url = "http://www.chinesemooc.org/mooc/"+str(item.course_id)
        parseCoursePage(tmp_url,header,item)
        item_list.append(item)

        #sleep
        #time.sleep(3)
    for record_item in item_list:
        value = []
        if record_item.course_id == 4407:
            str1 = "test"
        value.append(record_item.course_id)
        value.append(record_item.course_title)
        value.append(record_item.course_term)
        value.append(record_item.course_outline)
        value.append(record_item.course_view_num)
        value.append(record_item.course_comment_num)
        value.append(record_item.course_price)
        value.append(record_item.signup)
        value.append(record_item.course_des)
        value.append(record_item.teacher_info)
        value.append(record_item.assistant)
        value.append(record_item.school)
        test = sql% (value[0],value[1],value[2],value[3],value[4],value[5],value[6],value[7],value[8],value[9],value[10],value[11],)
        MySqlHelper.insert_one(cur,test,None)
Exemple #3
0
 def __init__(self, url):
     self.url = url
     self.sql = MySqlHelper.MySQL_Utils()
     self.header = {
         'User-Agent':
         'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36',
         'Connection': 'keep-alive'
     }
Exemple #4
0
 def get_now_time(self):
     # 连接阿里云mysql数据库,公网IP为47.96.104.151,端口3306,数据库noodle,表user
     # 表user-->>id,username,password,loigntime,idDelete
     self.sqlhelp = MySqlHelper.MySqlHelper('47.96.104.151', 3306, 'root',
                                            'mysql', 'noodle')
     machinesql = "select * from machines where sn='30038935'"
     machinelist = self.sqlhelp.get(machinesql, [])
     starttime = QDateTime.currentDateTime().toString("yyyy-MM-dd hh:mm:ss")
     starttimesql = "update machines set starttime=%s where sn='30038935'"
     self.sqlhelp.cud(starttimesql, [starttime])
     print(starttime)
Exemple #5
0
def main():
    print('请输入想要爬取页数:')  # 修改 可输入bv号,视频链接,视频名称
    sPage = int(input())
    html_text = __GetHtml__(sPage)
    IPList = __GetIPList__(html_text)
    oResultTuple = tuple()
    oResultTuple = __GetIPInfo__(IPList)
    mysql = MySqlHelper.DBHelper(flag=1)
    # for iTuple in oResultTuple:
    count = mysql.ExecuteNonQryText("free_agent_IP_ins.sql", oResultTuple)
    print('爬取结果:')
    print("共{0}条".format(count))
Exemple #6
0
def write2SQL(item):
    """
    把数据插入到数据库中
    """
    dbhelper = MySqlHelper.DBHelper()
    title = item['title']
    actor = item['stars']
    time = item['releasetime']
    sql = "INSERT INTO testdb.maoyan(title,actor,time) VALUES(%s,%s,%s)"
    params = (title, actor, time)
    result = dbhelper.execute(sql, params)
    if result == True:
        print("插入成功")
    else:
        print("插入失败")
Exemple #7
0
 def __init__(self):
     import pymysql
     pymysql.install_as_MySQLdb()
     self.sqlHelper = MySqlHelper.MySQL_Utils()