Python Job Examples

Programming Language: Python

Namespace/Package Name: base

Method/Function: Job

Examples at hotexamples.com: 4

Python Job - 4 examples found. These are the top rated real world Python examples of base.Job extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: Comics.py Project: txz19881104/python

    def _GetContentByKeyword(self, keyword, mode, download_path=None):
        """通过关键字查找到需要的内容，然后将返回的内容记录在kkmh_content结构中

        Parameters
        ----------
        keyword : str or unicode
            搜索文字

        mode    : str or unicode 
            download : 下载
            updata   ：更新图片
        Returns
        -------
        success: dict[list]--self.kkmh_content
        failed : None
        """

        #请求keyword网页
        self.keyword       = keyword
        self.download_path = download_path
        url_keyword        = self._url + '/e/search/'

        keyword_encode = keyword.encode('gbk','strict');
        button_encode  = "搜索漫画".encode('gbk','strict');
        params = {  
            'key':keyword_encode,  
            'button':button_encode,  
        }
        params = parse.urlencode(params).encode("gbk")
        content_keyword = BaseRequest.PostUrlSoup(url_keyword, params, 'gbk')
        if content_keyword == None:
            return False

        a_result = content_keyword.find_all('p',{'class':'fl cover'})
        #取出id关键字，从而访问搜索到的内容
        for data in a_result:
            
            if mode == "download":
                #判断此漫画是否已经下载过
                sql = "SELECT * FROM EntertainmentDB.ComicName WHERE Name=\"%s\";" %(data.a.img['alt'])
                if self._EntertainmentSelect(sql):
                    print("%s 已经下载过，请查看数据库" % data.a.img['alt'] )
                    continue
            
            #等待上一部漫画下载完成   
            while True:
                if not priority_queue.empty():
                    print("threads conunt :%d" %threading.active_count())
                    print("queue size : %d" %(priority_queue.qsize()))
                    if threading.active_count() < 10:
                        StartComicThread(10)  
                    time.sleep(60)
                    continue
                else:
                    break
            

            self.keyword         = data.a.img['alt']
            print(self.keyword)
            url_keyword_content  = self._url + "/" + data.a['href']
            soup_keyword_content = BaseRequest.GetUrlSoup(url_keyword_content, 'gbk')
            if soup_keyword_content == None:
                return False

            #将漫画信息存储到数据库
            sql_dict = collections.OrderedDict()
            sql_dict['Name']      = "\"" + self.keyword + "\""          #名字
            sql_dict['WatchNum']  = 0                                   #编号  
            sql_dict['Website']   = "\"" + self._url + "\""             #网址

            #找到漫画所有章节的地址,由于网页的顺序是从最后一章至第一章，所以要反向循环
            book = soup_keyword_content.find('div',{'class':'plist pnormal','id':'play_0'})
            a_book = []
            for data_content in book.ul:
                a = data_content.find('a')
                if a != None and a != -1:
                    a_book.append(a)

            if mode == "download":

                a_author    = soup_keyword_content.find('meta', {'property':'og:novel:author'})
                a_category  = soup_keyword_content.find('meta', {'property':'og:novel:category'})
                a_img       = soup_keyword_content.find('meta', {'property':'og:image'})
                a_introduce = soup_keyword_content.find('p', {'id':'intro'})
                IsFinish    = soup_keyword_content.find('meta', {'property':'og:novel:status'})
                if (IsFinish['content'] == '连载中'):
                    a_isfinish = 0
                else:
                    a_isfinish = 1
                
                #下载漫画封面
                for i in range(5):
                    if download_path != None:
                        path = '%s/Comics/%s/' %(download_path, self.keyword)
                        if not BaseRequest.DownloadData(a_img['content'], path, "封面.jpg"):
                            print("download %s failed %d time" % ("封面.jpg", i))
                        else:
                            print("download %s%s success" % (path,"封面.jpg"))
                            break
                src = "https://txz-1256783950.cos.ap-beijing.myqcloud.com/Comics/" + self.keyword + "/" + "封面.jpg"

                #将漫画信息存储到数据库
                sql_dict = collections.OrderedDict()
                sql_dict['Name']      = "\"" + self.keyword + "\""          #名字
                sql_dict['WatchNum']  = 0                                   #编号  
                sql_dict['Website']   = "\"" + self._url + "\""             #网址
                sql_dict['ChapterNum']= len(a_book)                         #总共有多少章节
                sql_dict['IsFinish']  = a_isfinish                          #是否完结
                sql_dict['Introduce'] = "\"" + a_introduce.a.contents[0] + "\""   #漫画介绍
                sql_dict['Author']    = "\"" + a_author['content'] + "\""   #作者
                sql_dict['Img']       = "\"" + src + "\""                   #封面图片
                sql_dict['Type']      = "\"" + a_category['content'] + "\""             #漫画类型
                sql_dict['Time']      = "\"" + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + "\"" #下载时间
                
                if not self._EntertainmentInsert('ComicName', sql_dict):
                    print("inster ComicName table failed!")
                    continue

                #获取漫画编号，唯一
                sql = "SELECT ID FROM EntertainmentDB.ComicName WHERE Name=\"%s\";" %(data.a.img['alt'])
                max_id = self._EntertainmentSelect(sql)
                if max_id:
                    self.id = max_id[0][0]
                else:
                    print("get max_id failed!")
                    continue
                
            elif mode == "update":
                now_Time = "\"" + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + "\"" #下载时间
                sql = "update EntertainmentDB.ComicName set Time = %s  where ID = %d;" %(now_Time, self.id)
                if not self._EntertainmentUpdate(sql):
                    print("%s update failed!" %(sql))

            count = 1
            for book in reversed(a_book):
                href  = book['href']
                title = book['title']

                #当前章节的内容插入到队列中
                url_a_book  = self._url + href

                data = {"url": url_a_book, "title":title, "href":href, "count": count}
                if mode == "download":
                    dic_queue = {"type": "download", "subtype": "download", "self":self, "data":data}
                elif mode == "update":
                    dic_queue = {"type": "download", "subtype": "update", "self":self, "data":data}

                priority_queue.put(base.Job(2,dic_queue,self._url))

                count += 1

        return True

Example #2

Show file

File: Comics.py Project: txz19881104/python

    def _GetContentByKeyword(self, keyword, mode, download_path=None):
        """通过关键字查找到需要的内容，然后将返回的内容记录在kkmh_content结构中

        Parameters
        ----------
        keyword : str or unicode
            搜索文字

        mode    : str or unicode 
            download : 下载
            updata   ：更新图片
        Returns
        -------
        success: dict[list]--self.kkmh_content
        failed : None
        """

        #请求keyword网页
        self.keyword       = keyword
        self.download_path = download_path
        url_keyword        = self._url + '/web/topic/search?keyword' +  parse.urlencode({"": keyword})
        content_keyword    = BaseRequest.GetUrlContent(url_keyword)
        if content_keyword == None:
            return False

        #将返回的内容解析
        content_keyword_json = json.loads(content_keyword.decode("utf8"))
        if content_keyword_json == False:
            return False

        #取出id关键字，从而访问搜索到的内容
        for data in content_keyword_json['data']['topic']:

            if mode == "download":
                #判断此漫画是否已经下载过
                sql = "SELECT * FROM EntertainmentDB.ComicName WHERE Name=\"%s\";" %(data['title'])
                if self._EntertainmentSelect(sql):
                    print(data['title'])
                    continue

            #等待上一部漫画下载完成   
            while True:
                if not priority_queue.empty():
                    print("threads conunt :%d" %threading.active_count())
                    print("queue size : %d" %(queue.qsize()))
                    if threading.active_count() < 10:
                        StartComicThread(10)  
                    time.sleep(60)
                    continue
                else:
                    break

            self.keyword         = data['title']
            url_keyword_content  = self._url + '/web/topic/' + str(data['id'])
            soup_keyword_content = BaseRequest.GetUrlSoup(url_keyword_content)
            if soup_keyword_content == None:
                return False

            #找到漫画所有章节的地址,由于网页的顺序是从最后一章至第一章，所以要反向循环
            a_book = soup_keyword_content.find_all('a',{'class':'article-img'})

            if mode == "download":

                a_author    = soup_keyword_content.find('div', {'class':'author-nickname'})
                a_introduce = soup_keyword_content.find('div', {'class':'switch-content'})
                a_img       = soup_keyword_content.find('img', {'class':'kk-img'})

                #下载漫画封面
                for i in range(5):
                    if download_path != None:
                        path = '%s/Comics/%s/' %(download_path, self.keyword)
                        if not BaseRequest.DownloadData(a_img['src'], path, "封面.jpg"):
                            print("download %s failed %d time" % ("封面.jpg", i))
                        else:
                            print("download %s%s success" % (path,"封面.jpg"))
                            break

                src = "https://txz-1256783950.cos.ap-beijing.myqcloud.com/Comics/" + self.keyword + "/" + "封面.jpg"

                #将漫画信息存储到数据库
                sql_dict = collections.OrderedDict()
                sql_dict['Name']      = "\"" + data['title'] + "\""         #名字
                sql_dict['WatchNum']  = 0                                   #编号  
                sql_dict['Website']   = "\"" + self._url + "\""             #网址
                sql_dict['ChapterNum']= len(a_book)                         #总共有多少章节
                sql_dict['IsFinish']  = 0                                   #是否完结
                sql_dict['Introduce'] = "\"" + a_introduce.p.contents[0].replace('\"', '') + "\""   #漫画介绍
                sql_dict['Author']    = "\"" + a_author.contents[0] + "\""  #作者
                sql_dict['Img']       = "\"" + src + "\""                   #封面图片
                sql_dict['Type']      = "\"" + self.type + "\""             #漫画类型
                sql_dict['Time']      = "\"" + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + "\"" #下载时间

                if not self._EntertainmentInsert('ComicName', sql_dict):
                    print("inster ComicName table failed!")
                    continue

                #获取漫画编号，唯一
                sql = "SELECT ID FROM EntertainmentDB.ComicName WHERE Name=\"%s\";" %(data['title'])
                max_id = self._EntertainmentSelect(sql)
                if max_id:
                    self.id = max_id[0][0]
                else:
                    print("get max_id failed!")
                    continue

            elif mode == "update":
                now_Time = "\"" + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + "\"" #下载时间
                sql = "update EntertainmentDB.ComicName set Time = %s  where ID = %d;" %(now_Time, self.id)
                if not self._EntertainmentUpdate(sql):
                    print("%s update failed!" %(sql))

            count = 1
            for book in reversed(a_book):
                href  = book['href']
                title = book['title']
                src   = book.img['src']

                #当前章节的内容插入到队列中
                url_a_book  = self._url + href
                data = {"url": url_a_book, "title":title, "src": src, "href":href, "count": count}
                if mode == "download":
                    dic_queue = {"type": "download", "subtype": "download", "self":self, "data":data}
                elif mode == "update":
                    dic_queue = {"type": "download", "subtype": "update", "self":self, "data":data}

                priority_queue.put(base.Job(2,dic_queue,self._url))

                count += 1


                p.spawn(run)
            
            p.join()

        return True

Example #3

Show file

File: Comics.py Project: txz19881104/python

    def _GetContentByKeyword(self, keyword, mode, download_path=None):
        """通过关键字查找到需要的内容，然后将返回的内容记录在kkmh_content结构中

        Parameters
        ----------
        keyword : str or unicode
            搜索文字

        mode    : str or unicode 
            download : 下载
            updata   ：更新图片
        Returns
        -------
        success: dict[list]--self.kkmh_content
        failed : None
        """

        #请求keyword网页
        self.keyword       = keyword
        self.download_path = download_path
        url_keyword        = self._url + '/search.html'

        keyword_encode = keyword.encode('big5','strict');
        params = {  
            'keyword':keyword_encode,  
            'searchtype':'all',  
        }
        params = parse.urlencode(params).encode("big5")

        content_keyword = BaseRequest.PostUrlSoup(url_keyword, params, 'big5')
        if content_keyword == None:
            return False

        a_result = content_keyword.find_all('span',{'class':'covertxt'})

        #取出id关键字，从而访问搜索到的内容
        for data in a_result:
            data_next_siblings = data.find_next_siblings()
            
            if mode == "download":
                #判断此漫画是否已经下载过
                sql = "SELECT * FROM EntertainmentDB.ComicName WHERE Name=\"%s\";" %(data_next_siblings[0]['title'])
                if self._EntertainmentSelect(sql):
                    print(data_next_siblings[0]['title'])
                    continue
            
            #等待上一部漫画下载完成   
            while True:
                if not priority_queue.empty():
                    print("threads conunt :%d" %threading.active_count())
                    print("queue size : %d" %(priority_queue.qsize()))
                    if threading.active_count() < 10:
                        StartComicThread(10)  
                    time.sleep(60)
                    continue
                else:
                    break
            

            self.keyword         = data_next_siblings[0]['title']
            print(self.keyword)
            url_keyword_content  = self._url + "/" + data_next_siblings[0]['href']
            soup_keyword_content = BaseRequest.GetUrlSoup(url_keyword_content, 'big5')
            if soup_keyword_content == None:
                return False

            #将漫画信息存储到数据库
            sql_dict = collections.OrderedDict()
            sql_dict['Name']      = "\"" + self.keyword + "\""          #名字
            sql_dict['WatchNum']  = 0                                   #编号  
            sql_dict['Website']   = "\"" + self._url + "\""             #网址

            save_content = soup_keyword_content.find_all('td',{'width':276})
            if save_content == None:
                return False

            sql_dict['Type']     = "\"" + save_content[1].a.contents[0].strip() + "\""
            sql_dict['Author']   = "\"" + save_content[3].contents[1].strip() + "\""
            a_IsFinish = 0
            if save_content[5].contents[4]['src'].strip() == "/image/chap1.gif":
                sql_dict['IsFinish'] = 0
            elif save_content[5].contents[4]['src'].strip() == "/image/chap9.gif":
                sql_dict['IsFinish'] = 1
            else:
                sql_dict['IsFinish'] = 0

            save_content = soup_keyword_content.find_all('table',{'width':688,'cellspacing':"8"})
            if save_content != None:
                sql_dict['Introduce'] = "\"" + save_content[0].tr.td.contents[0].strip() + "\""
            else:
                sql_dict['Introduce'] = ''

            save_content = soup_keyword_content.find_all('img',{'width':'240','height':'320'})
            a_img = ''
            if save_content != None:
                a_img = self._url + save_content[0]['src']
                

            #找到漫画所有章节的地址,由于网页的顺序是从最后一章至第一章，所以要反向循环
            save_content = soup_keyword_content.find_all('table',{'width':'688', 'align':'center'})
            if save_content == None:
                return False

            a_book = []
            for data_content in save_content[0].tbody:
                for data_td in data_content:
                    a = data_td.find('a')
                    if a != None and a != -1:
                        a_book.append(a)

            if mode == "download":

                #下载漫画封面
                for i in range(5):
                    if download_path != None:
                        path = '%s/Comics/%s/' %(download_path, self.keyword)
                        if not BaseRequest.DownloadData(a_img, path, "封面.jpg"):
                            print("download %s failed %d time" % ("封面.jpg", i))
                        else:
                            print("download %s%s success" % (path,"封面.jpg"))
                            break

                src = "https://txz-1256783950.cos.ap-beijing.myqcloud.com/Comics/" + self.keyword + "/" + "封面.jpg"

                #将漫画信息存储到数据库
                sql_dict['Img']       = "\"" + src + "\""
                sql_dict['ChapterNum']= len(a_book)                         #总共有多少章节
                sql_dict['Time']      = "\"" + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + "\"" #下载时间

                
                if not self._EntertainmentInsert('ComicName', sql_dict):
                    print("inster ComicName table failed!")
                    continue

                #获取漫画编号，唯一
                sql = "SELECT ID FROM EntertainmentDB.ComicName WHERE Name=\"%s\";" %(self.keyword)
                max_id = self._EntertainmentSelect(sql)
                if max_id:
                    self.id = max_id[0][0]
                else:
                    print("get max_id failed!")
                    continue
                
            elif mode == "update":
                now_Time = "\"" + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + "\"" #下载时间
                sql = "update EntertainmentDB.ComicName set Time = %s  where ID = %d;" %(now_Time, self.id)
                if not self._EntertainmentUpdate(sql):
                    print("%s update failed!" %(sql))

            count = 1
            for book in (a_book):
                href  = book['href']
                title = book.contents[0]

                #当前章节的内容插入到队列中
                url_a_book  = self._url + href

                data = {"url": url_a_book, "title":title, "href":href, "count": count}
                if mode == "download":
                    dic_queue = {"type": "download", "subtype": "download", "self":self, "data":data}
                elif mode == "update":
                    dic_queue = {"type": "download", "subtype": "update", "self":self, "data":data}

                priority_queue.put(base.Job(2,dic_queue,self._url))

                count += 1

        return True

Example #4

Show file

    def _GetContentByKeyword(self, keyword, mode, download_path=None):
        """通过关键字查找到需要的内容，然后将返回的内容记录在kkmh_content结构中

        Parameters
        ----------
        keyword : str or unicode
            搜索文字

        mode    : str or unicode 
            download : 下载
            updata   ：更新图片
        Returns
        -------
        success: dict[list]--self.kkmh_content
        failed : None
        """

        #请求keyword网页
        self.keyword = keyword
        self.download_path = download_path

        url_keyword = self._url + '/modules/article/soshu.php?searchkey=' + parse.quote(
            keyword, encoding='gbk', errors='replace')
        content_keyword = BaseRequest.GetUrlSoup(url_keyword, 'gbk')
        if content_keyword == None:
            return False

        #将返回的内容解析
        find_result = []
        if content_keyword.find('caption'):
            a_result = content_keyword.find_all('tr', {'id': 'nr'})
            if a_result == None:
                return False

            for result in a_result:
                find_result.append({
                    "name": result.td.a.contents[0],
                    "url": result.td.a['href']
                })
        else:
            a_url = content_keyword.find('meta', {'property': 'og:url'})
            if a_url == None:
                return False

            a_name = content_keyword.find('meta',
                                          {'property': 'og:novel:book_name'})
            if a_name == None:
                return False
            find_result.append({
                "name": a_name["content"],
                "url": a_url['content']
            })

        for result in find_result:
            if mode == "download":
                #判断此漫画是否已经下载过
                sql = "SELECT * FROM EntertainmentDB.tbl_fiction_name WHERE name=\"%s\";" % (
                    result["name"])
                if self._EntertainmentSelect(sql):
                    print(result["name"])
                    continue

                #等待上一部小说下载完成
                while True:
                    if not priority_queue.empty():
                        print("threads conunt :%d" % threading.active_count())
                        print("queue size : %d" % (priority_queue.qsize()))
                        if threading.active_count() < 10:
                            StartFictionThread(10)
                        time.sleep(60)
                        continue
                    else:
                        break

                self.keyword = result["name"]
                soup_keyword_content = BaseRequest.GetUrlSoup(
                    result["url"], 'gbk')
                if soup_keyword_content == None:
                    return False

                a_name = soup_keyword_content.find(
                    'meta', {'property': 'og:novel:book_name'})
                a_introduce = soup_keyword_content.find(
                    'meta', {'property': 'og:description'})
                a_image = soup_keyword_content.find('meta',
                                                    {'property': 'og:image'})
                a_category = soup_keyword_content.find(
                    'meta', {'property': 'og:novel:category'})
                a_author = soup_keyword_content.find(
                    'meta', {'property': 'og:novel:author'})
                a_url = soup_keyword_content.find(
                    'meta', {'property': 'og:novel:read_url'})
                a_status = soup_keyword_content.find(
                    'meta', {'property': 'og:novel:status'})
                a_list = soup_keyword_content.find('div', {'id': 'list'})
                a_book = a_list.dl.find_all('dd')

                #下载封面图片

                for i in range(5):
                    if download_path != None:
                        path = '%s/Fiction/%s/' % (download_path, self.keyword)
                        if not BaseRequest.DownloadData(
                                a_image['content'], path, "封面.jpg"):
                            print("download %s failed %d time" % ("封面.jpg", i))
                        else:
                            print("download %s%s success" % (path, "封面.jpg"))
                            break

                src = "https://txz-1256783950.cos.ap-beijing.myqcloud.com/Fiction/" + self.keyword + "/" + "封面.jpg"

                #将漫画信息存储到数据库
                sql_dict = collections.OrderedDict()
                sql_dict['name'] = "\"" + a_name['content'] + "\""  #名字
                sql_dict['watch_count'] = 0  #编号
                sql_dict['website'] = "\"" + self._url + "\""  #网址
                sql_dict['chapter_count'] = len(a_book)  #总共有多少章节
                sql_dict[
                    'introduce'] = "\"" + a_introduce['content'] + "\""  #漫画介绍
                sql_dict['author'] = "\"" + a_author['content'] + "\""  #作者
                sql_dict['cover_img_src'] = "\"" + src + "\""  #封面图片
                sql_dict['type'] = "\"" + a_category['content'] + "\""  #漫画类型
                sql_dict['add_time'] = "\"" + time.strftime(
                    "%Y-%m-%d %H:%M:%S", time.localtime()) + "\""  #下载时间
                if "连载中" in a_status['content']:
                    sql_dict['is_finish'] = 0  #是否完结
                else:
                    sql_dict['is_finish'] = 1

                if not self._EntertainmentInsert('tbl_fiction_name', sql_dict):
                    print("inster tbl_fiction_name table failed!")
                    continue

                #获取漫画编号，唯一
                sql = "SELECT ID FROM EntertainmentDB.tbl_fiction_name WHERE name=\"%s\";" % (
                    a_name['content'])
                max_id = self._EntertainmentSelect(sql)
                if max_id:
                    self.id = max_id[0][0]
                else:
                    print("get max_id failed!")
                    continue

            elif mode == "update":
                now_Time = "\"" + time.strftime("%Y-%m-%d %H:%M:%S",
                                                time.localtime()) + "\""  #下载时间
                sql = "update EntertainmentDB.tbl_fiction_name set add_time = %s  where pk_id = %d;" % (
                    now_Time, self.id)
                if not self._EntertainmentUpdate(sql):
                    print("%s update failed!" % (sql))

            count = 1
            for book in a_book:

                href = book.a['href']
                title = book.a.contents[0]

                #当前章节的内容插入到队列中
                url_a_book = self._url + href
                data = {
                    "ID": self.id,
                    "url": url_a_book,
                    "title": title,
                    "href": href,
                    "count": count
                }
                if mode == "download":
                    dic_queue = {
                        "type": "download",
                        "subtype": "download",
                        "self": self,
                        "data": data
                    }
                elif mode == "update":
                    dic_queue = {
                        "type": "download",
                        "subtype": "update",
                        "self": self,
                        "data": data
                    }

                priority_queue.put(base.Job(2, dic_queue, self._url))

                count += 1

        return True