Example #1
0
def get_weibo_comment(weibo_content_id):
    url = URL_COMMIT_URL.replace('id=4184562986557218',
                                 'id=' + weibo_content_id)
    content = request_url(url, 'get', '')
    try:
        result_json = json.loads(content)
        ok_flag = result_json['ok']
        test_comment_pool = ThreadPool(10)
        if ok_flag == 1:
            comment_data_arr = result_json['data']['data']
            get_comment_by_arr(weibo_content_id, comment_data_arr)
            page_count = result_json['data']['max']
            for page_index in range(2, page_count + 1):
                url = url.replace('&page=' + str(page_index - 1),
                                  '&page=' + str(page_index))
                content = request_url(url, 'get', '')
                try:
                    result_json = json.loads(content)
                    ok_flag = result_json['ok']
                    if ok_flag == 1:
                        comment_data_arr = result_json['data']['data']
                        #get_comment_by_arr(weibo_content_id,comment_data_arr)
                        test_comment_pool(get_comment_by_arr, (
                            weibo_content_id,
                            comment_data_arr,
                        ),
                                          callback=None)
                except Exception as e:
                    continue
            test_comment_pool.close()
    except Exception as e:
        pass
Example #2
0
 def scrap(self,url):
     if url is not None and len(url) > 0:
         url_info = tldextract.extract(url)
         self.domain = url_info.domain
         print '[INFO] Scrapper::scrap, domain',self.domain
         tp = ThreadPool(max_workers=120)
         tp.add_job(threaded_function,[self.domain,url])            
     else:
         print '[ALARM] Scrapper:scrap, invalid url'
Example #3
0
 def __init__(self,
              crawlername,
              workerThreadNum,
              pollInterval=0.5,
              pollTimeout=None,
              downloadTimeout=5):
     self.threadPool = ThreadPool(workerThreadNum)
     self.crawlername = crawlername
     self.pollTimeout = pollTimeout
     self.crawlerThread = CrawlerThread(self.threadPool, pollTimeout)
     self.mp3Downloader = MP3Downloader(downloadTimeout)
Example #4
0
def siper_data(user_id, start_date, end_date):
    main_pool = ThreadPool(5)
    start_date = datetime.strptime(start_date, '%Y-%m-%d')
    end_date = datetime.strptime(end_date, '%Y-%m-%d')
    # 采集微博用户信息
    get_user_info(user_id)
    # 读取微博ID
    container_id = get_containerid(user_id)
    # 采集微博LINK时间以及ID
    weibo_link_dict = get_weibo_link(user_id, container_id, start_date,
                                     end_date)
    main_pool.run(siper_weibo_content, (weibo_link_dict, ), callback=None)
    main_pool.run(siper_weibo_comment, (weibo_link_dict, ), callback=None)
Example #5
0
    def scan(self):
        if self.source:
            threadpool = ThreadPool.ThreadPool(1)
        else:
            threadpool = ThreadPool.ThreadPool(5)

        for rule in self.rules:
            threadpool.addtask(rule.detect, (), (self.results, rule))
            #if rule.detect():
            #    self.results.append(rule)
        threadpool.start()
        threadpool.wait()
        #threadpool.clear()
        #threadpool.stop()
        pass
Example #6
0
 def process_request(self, request, client_address):
     """
     构造一个任务,提交给线程池执行
     """
     work = ThreadPool.WorkRequest(super().process_request,
                                   args=(request, client_address))
     self.threadPool.putRequest(work)
Example #7
0
 def __init__(self,crawlername,workerThreadNum,
     pollInterval=0.5,pollTimeout=None,downloadTimeout=5):
     self.threadPool = ThreadPool(workerThreadNum)
     self.crawlername = crawlername
     self.pollTimeout = pollTimeout 
     self.crawlerThread = CrawlerThread(self.threadPool,pollTimeout)
     self.mp3Downloader = MP3Downloader(downloadTimeout)
Example #8
0
 def __init__(self, parent):
     super().__init__(parent)
     self.fixedSprites = list(
         range(0, 8)
     )  # eight fixed top arrow, each arrow should be put in the number of the arrow
     self.arrowSprites = []
     self.levelFeedbackSprites = [
         None
     ] * 2  # level feedback of two players, [0] -> p1, [1] -> p2
     self.setFocusPolicy(Qt.StrongFocus)
     self.threadPool = ThreadPool(20)
     self.spritePrototypeFactory = SpritePrototypeFactory()
     self.spritePrototypeFactory.prepare(MyGameView.GAME_WIDTH,
                                         MyGameView.GAME_HEIGHT)
     self.createAllTopFixedArrow()
     self.scoreRecordersMap = defaultdict(ScoreRecorder)
Example #9
0
def unzip_zip_file_async(zipfilename, unziptodir, end_cb=None):
    if not os.path.exists(unziptodir):
        os.makedirs(unziptodir)

    def unzip(zipfilename, unziptodir, end_cb=None):
        zfobj = zipfile.ZipFile(zipfilename, 'r')
        for name in zfobj.namelist():
            name = name.replace('\\', '/')
            if name.endswith('/'):
                os.makedirs(os.path.join(unziptodir, name))
            else:
                ext_filename = os.path.join(unziptodir, name)
                ext_dir = os.path.dirname(ext_filename)

                if not os.path.exists(ext_dir):
                    os.makedirs(ext_dir)

                data = zfobj.read(name)
                outfile = open(ext_filename, 'wb')
                outfile.write(data)
                outfile.close()
        if None != end_cb:
            end_cb(zipfilename, zfobj.namelist())

    t = ThreadPool.ThreadPool().Thread(target=unzip,
                                       args=(zipfilename, unziptodir, end_cb))
    t.start()
    return t
Example #10
0
def main():
    global thread_pool, store
    global headers, host, user_id
    user_id, thread_count, db_name = option_parser()
    store = Store(db_name)
    thread_pool = ThreadPool.Thread_Pool(thread_count)
    host = "http://blog.csdn.net"
    headers = {
        "User-Agent":
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36"
            " (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
    }

    front_page_url = host + "/" + user_id
    param = {
        "url": front_page_url
    }
    # 向任务队列添加下载博客首页的任务
    # 随后会自动解析下载所有目录页,进而解析下载所有博文
    # 当所有下载任务均结束后,队列为空。在任务进行时队列不会为空。
    # 所以可以通过等待队列为空进行线程同步
    thread_pool.add_work(Download_Front_Page, param)
    thread_pool.wait_queue_empty()

    store.store()
    print("---end---")
 def preprocessPercentileRatios(self):
     print "preprocessPercentileRatios start"
     distributionsFile = self.getDatasetSlidingSizesFile()
     if os.path.isfile(distributionsFile):
         #Teh distributions file is processed
         print "The distribution file exists"
         return
     self.initializePropertiesComputeStructures(False)
     
     print "computing the ratios"
     try:
         zpa = zipfile.ZipFile(distributionsFile,"w",zipfile.ZIP_DEFLATED)
         zpa.writestr("dummy.txt","dummy file")
         zpa.close()
         self.ziplock = threading.Lock()
         # Create a pool with three worker threads
         pool = ThreadPool.ThreadPool(5)  
         sys.setcheckinterval(1000)          
         for windowSize in self.slidingWindowSizes:
             if self.useWindowThreading:
                 pool.queueTask(self.preprocessWindowSize, windowSize, None)
             else:
                 self.preprocessWindowSize(windowSize)
         # When all tasks are finished, allow the threads to terminate
         if self.useWindowThreading:
             print "Joining the threads"
             pool.joinAll()             
     except:            
         os.unlink(distributionsFile)
         raise
     print "preprocessPercentileRatios end"
Example #12
0
 def __init__(self, store_location):
     #logging.debug("OFFLINE IMAGE CACHE STARTUP")
     self._store_location = store_location
     self._threadpool = ThreadPool.ThreadPool(5,
                                              "OfflineImageCache",
                                              daemon=True)
     self._cachers = {}  # dict of urlcacher objects?
Example #13
0
    def __init__(self,filepath):
        with open(filepath,'r',encoding='utf-8') as f:#解析配置文件
            self.config=json.loads(f.read())
        self.max_Task=self.config['max_Task']#最大任务数
        self.thread_count=self.config['threadCount']#线程数
        self.M_BLL=Mission_BLL.Mission()#任务表
        self.SI_BLL=SpiderInfo_BLL.SpiderInfo()#配置表
        self.HI_BLL=HoneyInfo_BLL.HoneyInfo()#数据库信息  
        self.EI_BLL=ExceptionInfo_BLL.ExceptionInfo()#异常信息
        self.Exception_Model=Model.exceptionInfo("","")#异常模型
        self.PRI=["high","common","low"]#优先级
        self.T_Pool = ThreadPool(self.thread_count,self.max_Task)#任务线程池

        #日志信息
        self.loger=logging.getLogger() 
        self.loger.setLevel(logging.DEBUG)
        hfile=logging.FileHandler("./Hive/LOG/ServerLogInfo.log")
        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        hfile.setFormatter(formatter)
        self.loger.addHandler(hfile)
        self.loger.info("Server初始化!")
def process_jobs(num_threads, job_queue):
    """ Process the jobs in parallel """

    # Create the thread pool
    tp = ThreadPool.ThreadPool(num_threads)

    # Process the jobs
    job_queue = tp.process_jobs(job_queue)

    for i in range(len(job_queue)):
        job = job_queue[i]
        print(job.thread.thread_id, job.start_time)
Example #15
0
 def __init__(self):
     self.countPosition = 0
     self.threadPool = ThreadPool.ThreadPool(1000)
     self.diffTime = 1114
     self.deviationTime = 200
     self.timer = QBasicTimer()
     self.__noteMap = collections.defaultdict(list)  # <startTime, note[]>
     self.__player = None
     self.__musicPlayerView = None
     self.sheetRecorder = None
     self.scoreRecorder = None
     self.sheet = None
Example #16
0
    def __init__(self):
        self.fairness_factor = 0.2
        self.fairness_scores = {}
        self.share_scores = {}
        self.dag_planners = {}
        self.alpha = 0.5
        self.available_bandwidth = 1200  # 10Gbps = 1.2 GBps = 1200 MBps
        self.cache_block_size = 1  # 1MBype
        self.cache = cache.Cache(10000)

        self.n_thp = 10
        self.d3n_conn = thp.ThreadPool(self.n_thp)
Example #17
0
    def __init__(self, sizeOfBuffer, numberOfCores, numberOfThreads,
                 timeQuantum, contextSwitchTime):
        Buffer.Buffer.initBufferCount()
        self.buffer = Buffer.Buffer(sizeOfBuffer)
        self.numberOfCores = numberOfCores
        self.cores = []

        for y in list(range(numberOfCores)):
            self.cores.append(Core.Core(y, 0))

        self.threadPool = ThreadPool.ThreadPool(numberOfThreads)
        self.timeQuantum = timeQuantum
        self.contextSwitchTime = contextSwitchTime
Example #18
0
class MP3Crawler:
    '''
    @param crawlername
    @param workerThreadNum
    @param pollInterval: interval time to poll task from task queue
    @param pollTimeout: timeout seconds to poll a task from task queue
    @param downloadTimeout: timeout seconds to download media from web
    '''
    def __init__(self,crawlername,workerThreadNum,
        pollInterval=0.5,pollTimeout=None,downloadTimeout=5):
        self.threadPool = ThreadPool(workerThreadNum)
        self.crawlername = crawlername
        self.pollTimeout = pollTimeout 
        self.crawlerThread = CrawlerThread(self.threadPool,pollTimeout)
        self.mp3Downloader = MP3Downloader(downloadTimeout)

    def start(self):
        '''start crawl'''
        self.crawlerThread.start()

    def stop(self):
        '''stop crawl, block until all tasks finish'''
        self.threadPool.stop()
        self.crawlerThread.dismiss()
        self.crawlerThread.join()

    def waitUtilComplete(self):
        '''wait until all tasks complete'''
        self.threadPool.wait()

    def __checkTask(self,task):
        if task.has_key('type')==False or task.has_key('url') ==False:
           return False
        if task['type']!='mp3' and task['type']!='html' and task['type']!='json':
            return False
        if task.has_key('savePath')==False:
            return False
        else:
            return True
    
    def downloadMP3(self,url,filePath):
        self.mp3Downloader.downloadM(url, filePath)


    def __printResult(self,request,result):
        print "---Result from request %s : %r" % (request.requestID,result)
        #pass

    def addTask(self,task):
        '''add a mp3 download task
        '''
        if self.__checkTask(task) == False:
            print 'Task not Avilable:', task  
            return
        req = WorkRequest(self.downloadMP3,args=[task['url'],task['savePath']],kwds={},callback=self.__printResult)
        self.threadPool.putRequest(req)
        print "work request #%s added." % req.requestID
Example #19
0
	def __init__(self, view, home, share_path):
		PTVhtml.PTVhtml.__init__(self, view, home, share_path)
		self._htmlview = None
		self._document_lock = threading.Lock()
		self._image_cache = SimpleImageCache.SimpleImageCache()
		self._css = ""
		self._last_link_time = 0
		
		self._view = view
		
		f = open(os.path.join(share_path, "gtkhtml.css"))
		for l in f.readlines(): self._css += l
		f.close()
		self._image_pool = ThreadPool.ThreadPool(5, "PlanetView")
		self._dl_total = 0
		self._dl_count = 0
Example #20
0
    def __init__(self,
                 app,
                 media_dir,
                 progress_callback=None,
                 finished_callback=None):
        self.index = 0
        #should this be lucene compatible?
        if utils.RUNNING_HILDON:
            max_downloads = 1
        else:
            max_downloads = 5
        self._style = BYDATE
        self.pool = ThreadPool.ThreadPool(max_downloads, "MediaManager")
        self.downloads = []
        self.db = app.db
        self.time_appendix = 0
        self.bt_settings = {'min_port': 6881, 'max_port': 6999, 'ul_limit': 0}
        self.id_time = 0
        self.quitting = False
        self._net_connected = True
        self.pause_state = RUNNING
        if finished_callback:
            self.app_callback_finished = finished_callback
        else:
            self.app_callback_finished = self._basic_finished_callback

        if progress_callback:
            self.app_callback_progress = progress_callback
        else:
            self.app_callback_progress = self._basic_progress_callback
        home = self.db.home

        if media_dir[0] == '~':
            media_dir = os.getenv('HOME') + media_dir[1:]

        try:
            os.stat(media_dir)
        except:
            try:
                os.mkdir(media_dir)
            except:
                raise NoDir, "error creating " + media_dir
        self._media_dir = media_dir

        app.connect('online-status-changed', self.__online_status_changed)
        app.connect('new-database', self.__new_database_cb)
Example #21
0
def siper_weibo_comment(weibo_link_dict):
    comment_pool = ThreadPool(10)
    print 'weibo comment siper start ...'
    for weibo_content_id in weibo_link_dict:
        created_at = weibo_link_dict[weibo_content_id]
        print weibo_content_id, created_at
        comment_pool.run(get_weibo_comment, (weibo_content_id, ),
                         callback=None)
        # 解析微博评论
        #get_weibo_comment(weibo_content_id)
    print 'weibo comment siper end ...'
    comment_pool.close()
Example #22
0
def siper_weibo_content(weibo_link_dict):
    content_pool = ThreadPool(10)
    print 'weibo content siper start ....'
    # 采集微博内容信息
    for weibo_content_id in weibo_link_dict:
        created_at = weibo_link_dict[weibo_content_id]
        content_pool.run(get_weibo_content,
                         (user_id, weibo_content_id, created_at),
                         callback=None)
        # 采集微博内容信息
        #get_weibo_content(user_id,weibo_content_id,created_at)
    print 'weibo content siper end ....'
    content_pool.close()
Example #23
0
def unzip_7z_file_async(zipfilename, unziptodir, end_cb=None):
    if not os.path.exists(unziptodir):
        os.makedirs(unziptodir)

    def unzip(zipfilename, unziptodir, end_cb=None):
        try:
            archive = py7zr.SevenZipFile(zipfilename, mode='r')
            names = archive.getnames()
            archive.extractall(path=unziptodir)
            archive.close()
            if None != end_cb:
                end_cb(zipfilename, names)
        except Exception as e:
            print(e)

    t = ThreadPool.ThreadPool().Thread(target=unzip,
                                       args=(zipfilename, unziptodir, end_cb))
    t.start()
    return t
Example #24
0
def getRemoteFileByUrlAsync(url, path, progress_cb=None, end_cb=None):
    file_size = 0
    try:
        file_size = int(urlopen(url).info().get('Content-Length', -1))
    except Exception as e:
        print(e)
        return None

    def getRemoteFunc(url, path, file_size, progress_cb=None, end_cb=None):
        try:
            if not os.path.exists(os.path.dirname(path)):
                os.makedirs(os.path.dirname(path))

            pbar = tqdm(total=file_size,
                        initial=0,
                        desc=path,
                        unit='B',
                        unit_scale=True)
            r = requests.get(url, stream=True, verify=False)
            curSize = 0.0
            chunksize = 1024 * 1024 * 4
            with open(path, "wb") as f:
                for chunk in r.iter_content(chunk_size=chunksize):
                    f.write(chunk)
                    curSize = curSize + len(chunk)

                    if None != progress_cb:
                        progress_cb(curSize, file_size)
                    pbar.update(chunksize)
                f.close()
                pbar.close()
                if None != end_cb:
                    end_cb(path, True)
        except Exception as e:
            print(e)
            if None != end_cb:
                end_cb(path, False)

    t = ThreadPool.ThreadPool().Thread(target=getRemoteFunc,
                                       args=(url, path, file_size, progress_cb,
                                             end_cb))
    t.start()
    return t
def siper_weibo_comment(weibo_link_dict, user_id):
    comment_pool = ThreadPool(MAX_THREAD)
    print 'weibo comment siper start ...'
    for weibo_content_id in weibo_link_dict:
        created_at = weibo_link_dict[weibo_content_id]
        print weibo_content_id, created_at
        #get_weibo_comment_spider(weibo_content_id,user_id)
        comment_pool.run(get_weibo_comment_spider, (
            weibo_content_id,
            user_id,
        ),
                         callback=None)
        # 解析微博评论
        #get_weibo_comment(weibo_content_id)
    print 'weibo comment siper end ...'
    comment_pool.close()
Example #26
0
class WorkThread(QtCore.QThread):
    signal_str = QtCore.pyqtSignal(str)
    scan_thread_pool = ThreadPool.ThreadPoolManager(thread_num)  # 线程池

    def __init__(self, scan_range, result):
        super(WorkThread, self).__init__()
        self.scan_range = scan_range
        self.result = result

    def run(self):
        print('WorkThread is running')
        startIp = self.scan_range[0].split('.')
        endIp = self.scan_range[1].split('.')
        startPort = int(self.scan_range[2])
        endPort = int(self.scan_range[3])
        # for i in range(len(startIp)):
        #     startIp[i] = int(startIp[i])
        # for i in range(len(endIp)):
        #     endIp[i] = int(endIp[i])

        if startIp[0] != endIp[0] or startIp[1] != endIp[1]:
            self.signal_str.emit('范围区间过大,不想扫  `')
            return
        # 已知假设 startip小于等于endip ,startport小于等于endport
        s_ip2 = int(startIp[2])
        s_ip3 = int(startIp[3])
        e_ip2 = int(endIp[2])
        e_ip3 = int(endIp[3])

        for i in range(startPort, endPort + 1):
            for j in range(s_ip3, e_ip3 + 1):
                for k in range(s_ip2, e_ip2 + 1):
                    if not stopTag:
                        dst_ip = startIp[0] + '.' + startIp[1] + '.' + str(
                            k) + '.' + str(j)  # 目标ip str型
                        dst_port = i  # 目标端口 int型
                        # scan_info = (dst_ip, dst_port, open_port)
                        WorkThread.scan_thread_pool.add_job(
                            scan, dst_ip, dst_port, self.result)
        print('WorkThread is exited')
Example #27
0
    def __init__(self,
                 server_address,
                 RequestHandlerClass,
                 bind_and_activate=True,
                 thread_num=10,
                 RequestInterceptorClasses=[]):
        """Constructor.  
            thread_num 线程池默认的线程数量
            
            """
        self.requestInterceptorClasses = RequestInterceptorClasses
        #初始化线程池
        self.threadPool = ThreadPool.ThreadPool(thread_num)
        #设置urllib opener, HTTP server接收到请求时,需要通过urllib 转发出请求,这里全局设置urllib
        cj = http.cookiejar.CookieJar()
        #proxyHandler = urllib.request.ProxyHandler({'http','127.0.0.1:8888'}) 代理
        opener = urllib.request.build_opener(
            urllib.request.HTTPCookieProcessor(cj), KIZHTTPRedirectHandler,
            KIZHttpErrorHandler)
        urllib.request.install_opener(opener)

        super().__init__(server_address, RequestHandlerClass,
                         bind_and_activate)
Example #28
0
def spider_page(url, fid):
    content = request_url(url, 'get', '')
    soup = BeautifulSoup(content, 'lxml')
    tbody_list = soup.find('table', id='threadlisttableid').find_all('tbody')
    main_pool = ThreadPool(MAX_THREAD)
    for tbody in tbody_list:
        id = tbody.get('id')
        id = id.replace('separatorline',
                        '').replace('stickthread',
                                    '').replace('normalthread',
                                                '').replace('_', '')
        if len(id) > 0 and id:
            content_url = 'https://bbs.ichunqiu.com/thread-' + str(
                id) + '-1-1.html'
            #ichunqiu_sipder(content_url,fid)
            main_pool.run(ichunqiu_sipder, (
                content_url,
                fid,
            ), callback=None)
    main_pool.close()
def get_weibo_comment_spider(content_id, user_id):
    url = 'https://weibo.com/aj/v6/comment/big?id=' + content_id + '&filter=all&page=1'
    try:
        page_count = get_weibo_comment_ext(url, content_id, user_id)
        comment_pool = ThreadPool(MAX_THREAD)
        for i in range(2, page_count + 1):
            url = 'https://weibo.com/aj/v6/comment/big?id=' + content_id + '&filter=all&page=' + str(
                i)
            try:
                #get_weibo_comment_ext(url,content_id,user_id)
                comment_pool.run(get_weibo_comment_ext,
                                 (url, content_id, user_id),
                                 callback=None)
            except Exception as e1:
                print e1
                traceback.print_exc()
                continue
        comment_pool.close()
    except Exception as e:
        traceback.print_exc()
Example #30
0
import tkinter as tk
import tkinter.messagebox
import socket
import time
import threading
import queue
import sys
from Encryption import encryptPasswd, encodeId, decodeId, pad, readMessage, readRoomList, readUserList
from Settings import HOST, PORT, COMMAND_CODE
import ThreadPool

client_thread_pool = ThreadPool.ThreadPool(3, True)
MessageQueue = queue.Queue()
RoomsList = list()
# True for Rooms List HAVE BEEN Read, False for not
RoomsListFlag = True
UsersList = list()
# True for Users List HAVE BEEN Read, False for not
UsersListFlag = True
ReturnCode = 0
# True for message HAVE BEEN Read, False for HAVE NOT BEEN Read
ReturnCodeFlag = True
ChatRooms = dict()


def clientReceiveLogic(conn):
    global RoomsListFlag, RoomsList, ReturnCode, ReturnCodeFlag, loginPage
    conn = conn[0]
    while True:
        data = MessageQueue.get()
        Command = int.from_bytes(data[0:2], byteorder='big')
Example #31
0

#python computeFVs.py videos vid_in vid_out
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("vid_path", help="Directory of the input videos", type=str)
    parser.add_argument("vid_in", help="list of input videos in .txt file", type=str)
    parser.add_argument("output_dir", help="output directory to save FVs (.fisher files)", type=str)
    parser.add_argument("gmm_list", help="File of saved list of GMMs", type=str)

    args = parser.parse_args()

    f = open(args.vid_in, 'r')
    input_videos = f.readlines()
    f.close()
    input_videos = [line.split()[0].split('/')[-1] for line in [video.rstrip() for video in input_videos]]

    ###Just to prevent overwriting already processed vids
    completed_vids = [filename.split('.')[0] for filename in os.listdir(args.output_dir) if filename.endswith('.npz')]
    overlap = [vid for vid in input_videos if vid.split('.')[0] in completed_vids]



    #Multi-threaded FV construction.
    numThreads = 10
    pool = ThreadPool.ThreadPool(numThreads)
    for vid in input_videos:
        if vid not in overlap:
            pool.add_task(processVideo,vid,args.vid_path,args.output_dir,args.gmm_list)
    pool.wait_completion()
Example #32
0
    return asyncDecorator

if __name__ == "__main__":
    from time import sleep
    from ThreadPool import *
    try:
        xrange
    except NameError:
        xrange = range
    class TestClass():
        @Async()
        def testDecorated(self):
            print (345)
    testClass = TestClass()
    testClass.testDecorated()
    @Async(executor=ThreadPool(5))
    def func(a, b):
        print ("func called")
        sleep(1)
        print ("func exit:" + str(a))

    @Async()
    def funcWithoutExecutor(a):
        print (a)

    for x in xrange(1, 10):
        funcWithoutExecutor("noExecutor:" + str(x))


    for x in xrange(1, 15):
        func(x, 2)
Example #33
0
            author = string_tag.find('a',class_='xi2').string
            break
    for date_div in soup.find_all('div',class_='cl',attrs={'style':'font-size: 12px; color: #888888;'}):
        tmp_str = str(date_div)
        date_arr =  re.findall('\d{4}-\d{1,2}-\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}',tmp_str)
        if len(date_arr) > 0:
            content_date = date_arr[0]
            break
    content_id = MySQLdb.escape_string(content_id)
    title = MySQLdb.escape_string(title)
    author = MySQLdb.escape_string(author)
    content_date = MySQLdb.escape_string(content_date)
    connection = pool.connection()
    cursor = connection.cursor()
    sql = "INSERT IGNORE INTO ichunqiu_content(id,content_id,title,url,author,content_date,create_date,update_date) VALUES (DEFAULT,'"+content_id+"','"+title+"','"+url+"','"+author+"','"+content_date+"',NOW(),NOW())"
    #print sql
    cursor.execute(sql)
    connection.commit()
    cursor.close()
    connection.close()
    print 'content_id -->',content_id,' ok'

if __name__ == '__main__':
    main_pool = ThreadPool(MAX_THREAD)
    for i in range(33742,34832+1):
        url = 'https://bbs.ichunqiu.com/thread-'+str(i)+'-1-1.html'
        #ichunqiu_sipder(url)
        main_pool.run(ichunqiu_sipder,(url,), callback=None)
        #break
       # print url
    main_pool.close()
Example #34
0
from ThreadPool import *

def tester(num):
    print("FXXXXXK %d" % (num, ))

def tester1():
    print("FXXXXXK")

if __name__ == '__main__':
    thread_pool = ThreadPool(10)
    for i in range(1000):
        thread_pool.append_job(tester, i)

    for i in range(1000):
        thread_pool.append_job(tester1)

    thread_pool.start()
    thread_pool.join()
Example #35
0
def test():
    import random
    import time
    import datetime

    def do_work(data):
        time.sleep(random.randint(1, 3))
        res = str(datetime.datetime.now()) + "" + str(data)
        return res

    def print_result(request, result):
        print "---Result from request %s : %r" % (request.requestID, result)

    main = ThreadPool(3)
    for i in range(40):
        req = WorkRequest(do_work, args=[i], kwds={}, callback=print_result)
        main.putRequest(req)
        print "work request #%s added." % req.requestID

    print '-' * 20, main.workersize(), '-' * 20

    counter = 0
    while True:
        try:
            time.sleep(0.5)
            main.poll()
            if (counter == 5):
                print "Add 3 more workers threads"
                main.createWorkers(3)
                print '-' * 20, main.workersize(), '-' * 20
            if (counter == 10):
                print "dismiss 2 workers threads"
                main.dismissWorkers(2)
                print '-' * 20, main.workersize(), '-' * 20
            counter += 1
        except NoResultsPending:
            print "no pending results"
            break

    main.stop()
    print "Stop"
Example #36
0
#!/usr/bin/python

import Queue

from ThreadPool import *
from util import *

hosts = get_hosts()
queue = Queue.Queue()
tp = ThreadPool(queue, len(hosts))

for hostname in hosts:
    tp.add_job(read_result, hostname)

tp.wait_for_complete()
Example #37
0
#! /usr/bin/env python

from CULCrawler import CULCrawler
from ThreadPool import *
from Config import config
import urllib
from Daemon import *

if __name__ == "__main__":
	#createDaemon()
	crawler = CULCrawler()
	threadNum = 5
	pool = ThreadPool(threadNum)
	crawl_page_num = int(config.get_config('crawl_pages'))
	for i in xrange(crawl_page_num):
		url = 'http://www.citeulike.org/home/page/' + str(i + 1)
		pool.queueTask(crawler.crawl, url)
	# keywords search
	f = open("keywords", "r")
	for keyword in f.readlines():
		keyword = keyword.strip()
		query = urllib.urlencode({'q' : keyword})
		url_prefix = 'http://www.citeulike.org/search/all/page/'
		for i in xrange(crawl_page_num):
			url = url_prefix + str(i + 1) + '?' + query
			#print url
			pool.queueTask(crawler.crawl, url)
	f.close()
	pool.joinAll()