Example #1
0
 def compress_stuff(self):
     if not self.is_compressinating:
         self.is_compressinating = True
         pool = ThreadPool(8)
         to_process = self.root.ids.unprocessed.text
         parsed_list = []
         for line in to_process.splitlines():
             if not line[0] == '#':
                 parsed = re.sub(r'[\n\/]+$', '', line)
                 parsed = re.sub(r'#.*$', '', parsed)
                 if parsed:
                     if re.match(url_regex, parsed) is not None:
                         parsed_list.append(parsed)
                     else:
                         printlog(
                             'It looks like you entered a malformed URL. I am gonna ignore it '
                             + parsed)
         self.compressed_so_far = 0
         self.total_to_compress = len(parsed_list)
         self.show_popup()
         self.clock_interval = Clock.schedule_interval(
             self.update_status, 1 / 15)
         try:
             pool.starmap_async(process_url,
                                zip(parsed_list,
                                    itertools.repeat(updatedConfig)),
                                callback=self.sites_compressed)
         except Exception as err:
             printlog('URL processing error: ' + repr(err))
Example #2
0
class ParallelSim(object):
    """ 多进程map类
        pl = ParallelSim()
        pl.add(yourFunc, yourIter)
        pl.get_results()
        data = list(pl.get_results())
    """
    def __init__(self, processes=cpu_count()):
        self.pool = Pool(processes=processes)
        self.total_processes = 0
        self.completed_processes = 0
        self.results = []
        self.data = None
        self.cores = processes  # cpu核心数量

    def add(self, func, iter):
        if isinstance(iter, list) and self.cores > 1:
            for i in range(self.cores):
                pLen = int(len(iter) / self.cores) + 1
                self.data = self.pool.starmap_async(
                    func,
                    iter[int(i * pLen):int((i + 1) * pLen)],
                    callback=self.complete)
                self.total_processes += 1
        else:
            self.data = self.pool.starmap_async(func=func,
                                                iterable=iter,
                                                callback=self.complete)
            self.total_processes += 1
        self.data.get()

    def complete(self, result):
        self.results.extend(result)
        self.completed_processes += 1
        print('Progress: {:.2f}%'.format(
            (self.completed_processes / self.total_processes) * 100))

    def run(self):
        self.pool.close()
        self.pool.join()

    def get_results(self):
        return self.results
Example #3
0
def CheckDirs(dir, clientname):
    global early_stop
    assert (os.path.isdir(dir))
    verified = set()
    callsites = [
        os.path.abspath(os.path.join(dir, o)) for o in os.listdir(dir)
        if os.path.isdir(os.path.join(dir, o)) and o.startswith("callsite_")
    ]

    innput_array = []
    for callsite in callsites:
        innput_array.append([callsite, verified, clientname])

    pool = ThreadPool(int(cpu_count() / 2))
    results = pool.starmap_async(CheckCallSite, innput_array)

    while (not results.ready() and not early_stop):
        continue
    if early_stop:
        return False
    else:
        return True
Example #4
0
            pwd = sys.argv[2].encode('gbk')
        elif sys.argv[2] == "rm":
            if len(sys.argv) == 4:
                pwd = sys.argv[3].encode('gbk')
    return pwd


def rm_file(files):
    """删除源文件"""
    if len(sys.argv) > 2:
        if sys.argv[2] == "rm":
            for file in files:
                os.remove(file)


if __name__ == '__main__':
    path = sys.argv[1]
    pwd = get_pwd()
    # with open(r'UnRAR.exe','rb') as f:
    #     with open(os.path.join(path,'UnRAR.exe'),'wb') as other:
    #         other.write(f.read())
    file_names = point_file_name(path)
    while file_names:
        pool = Pool()
        pool.starmap_async(un_rar, zip(file_names, [pwd] * len(file_names)))
        pool.close()
        pool.join()
        rm_file(file_names)
        for name in file_names:
            file_names = point_file_name(name.split(".")[0])
Example #5
0
class ImageHandler:
    dir_postfix = 'imgc'
    dir_files = '_files-imgc'

    imgs_done = 0
    imgs_total = 0

    def __init__(self, queue=None, **kwargs):
        self.__dict__.update(kwargs)
        self.images = [] # list of (src, dst) tuples
        # threading.Thread.__init__(self)
        # self.queue = queue
        # self._stop = threading.Event()
        self.finished = False
        self.generate_image_paths()
    
    def generate_image_paths(self):
        for arg in self.src_images:

            # process directory path
            if os.path.isdir(arg):
                src = os.path.abspath(arg)
                dst = os.path.abspath("{}-{}".format(src, self.dir_postfix))
                if not os.path.exists(dst):
                    os.makedirs(dst) # target path must always exist

                for root, dirs, files in os.walk(src):
                    for d in dirs:
                        src_dir = os.path.join(root, d)
                        dst_dir = src_dir.replace(src, dst)
                    for f in files:
                        if extension(f) not in IMAGE_EXTS: continue
                        # if os.path.splitext(f)[1][1:].strip().lower() not in IMAGE_EXTS: continue
                        src_file = os.path.join(root, f)
                        dst_file = src_file.replace(src, dst)

                        parent_dir = os.path.dirname(dst_file)
                        print(parent_dir)
                        if not os.path.exists(parent_dir):
                            os.makedirs(parent_dir)
                        print(dst_file)
                        self.append((src_file, dst_file))

            # process file path
            elif os.path.isfile(arg):
                src = os.path.abspath(arg)
                dst_dir = os.path.join(os.path.dirname(src), self.dir_files)
                if not os.path.exists(dst_dir):
                    os.makedirs(dst_dir)
                dst = os.path.join(dst_dir, os.path.basename(arg))
                self.append((src, dst))
            # print (self.images)
        self.imgs_total = len(self.images)

    # def stop(self):
    #     self._stop.set()

    # def stopped(self):
    #     return self._stop.isSet()

    def terminate_pool(self):
        if not self.finished:
            self.pool.close()
            self.pool.terminate()
        return self.finished

    def on_finish(self, x):
        print("{}: finished successfully!".format(self.__class__.__name__))
        self.finished = True        
        self.error = False

    def on_error(self, x):
        raise x
        # print("{}: error - {}".format(self.__class__.__name__, str(x)))
        # self.finished = True
        # self.error = True

    def run(self):

        if not self.images: 
            print("No images found!") 
            sys.exit()

        # print("Images found: {}".format(self.imgs_total))
        print("Images found: {}".format(len(self.images)))
        # print(self.images)

        self.pool = ThreadPool(self.workers)

        time_start = time.time()
        # self.pool.starmap(self.resize_image, self.images)
        self.pool.starmap_async(
            self.resize_image, self.images, 1,
            self.on_finish, self.on_error)
        # pool.close()
        # pool.join()
        # time_end = time.time()

        # # window = tkinter.Tk()
        # # window.wm_withdraw()
        # # tkinter.messagebox.showinfo('imgc - work finished', 'Images compressed: {}'.format(self.imgs_done))
        # print("Time elapsed: {}".format(time_end - time_start))

    def append(self, path_tuple):
        self.images.append(path_tuple)

    def print_status(self, dst):
        print("[{}/{}] processed image {}".format(
            self.imgs_done, self.imgs_total, os.path.split(dst)[1]))

    # FIXME: add watermark feature
    # def watermark():
    #     try:
    #         watermark = Image.open(self.wmfile)
    #         # if extension(self.wmfile) in "png":
    #         #     watermark.load()                
    #         wm_oldsize = watermark.size
    #         wm_newdim = min(wnew, hnew) * 0.2
    #         wm_oldindex, wm_olddim = min(enumerate(wm_oldsize))
    #         print(wm_oldindex, wm_olddim)
    #         wm_ratio = wm_newdim/wm_olddim
    #         wm_newsize = (int(wm_oldsize[0] * wm_ratio), 
    #                       int(wm_oldsize[1] * wm_ratio))
    #         print(wm_newsize)
    #         watermark = watermark.resize(wm_newsize)
    #         # mask = watermark.convert("L").point(lambda x: min(x, 50))
    #         # .point(lambda x: 240)
    #         # mask.show()
    #         # watermark.putalpha(mask)
    #         im.paste(watermark, (0, 0), watermark)
    #     except AttributeError:
    #         pass

    def resize_image(self, src, dst):
        print("resizing")
        pattern = self.size
        quality = self.quality

        im = Image.open(src)

        new_size = ImageSize.parse(pattern, image=im)
        print('new_size:', new_size)
        im = im.resize(new_size, Image.BICUBIC)


        if os.path.splitext(dst)[1][1:].strip().lower() not in IMAGE_JPG:
            im.save(dst)
            print("saving non-jpeg %s" % dst)
        else:
            # quality supported by jpegs only
            im.save(dst, 'JPEG', quality=quality)
            print("saving jpeg %s" % dst )
        self.imgs_done += 1
        # self.print_status(dst)

        print ("processed")
        try:
            self.on_image_processed(self.imgs_done, self.imgs_total)
            print ("method called")
        except AttributeError:
            self.print_status(dst)
Example #6
0
class ThreadPool:
    # multiprocessing.dummy.Pool with exc_info in error_callback
    def __init__(self,name=None,processes=None):

        self._processes=processes
        self._pool=NamedPool(self._processes,name=name)
        self._lock=Lock() # lock for self
        self._cblock=Lock() # lock for callback
        self._errcblock=Lock() # lock for error_callback
        self._closed=False

        self.name=name

    def apply(self,*args,**kwargs):
        return self._pool.apply(*args,**kwargs)

    def map(self,*args,**kwargs):
        return self._pool.map(*args,**kwargs)

    def map_async(self,*args,**kwargs):
        return self._pool.map_async(*args,**kwargs)

    def imap(self,*args,**kwargs):
        return self._pool.imap(*args,**kwargs)

    def imap_unordered(self,*args,**kwargs):
        return self._pool.imap_unordered(*args,**kwargs)

    def starmap(self,*args,**kwargs):
        return self._pool.starmap(*args,**kwargs)

    def starmap_async(self,*args,**kwargs):
        return self._pool.starmap_async(*args,**kwargs)

    def join(self):
        return self._pool.join()

    def _uiter(self,iterable):
        buf=[]
        for item in iterable:
            if item in buf:
                continue
            yield item
            buf.append(item)
        buf.clear()

    def _trycall(self,func,args=(),kwargs={},lock=None):
        if not callable(func):
            return
        with lock:
            try:
                return func(*args,**kwargs)
            except:
                pass

    def _caller(self,func,args,kwargs,callback,error_callback,exc_raise):
        try:
            result=func(*args,**kwargs)
        except:
            etype,value,tb=sys.exc_info()
            self._trycall(error_callback,args=(self.name,etype,value,tb),
                          lock=self._errcblock)
            if exc_raise:
                raise etype(value)
        else:
            self._trycall(callback,args=(result,),
                          lock=self._cblock)
            return result

    def apply_async(self,func,args=(),kwargs={},
                    callback=None,error_callback=None):
        # run error_callback with ThreadPool.name and exc_info if func failed,
        # callback and error_callback will *not* run in multi thread.
        # other arguments is same as Pool.apply_async
        return self._pool.apply_async(
            self._caller,(func,args,kwargs,None,error_callback,True),
            callback=callback)

    def cbmap(self,func,iterable,callback=None,error_callback=None):
        # shortcut of:
        #
        # for item in iterable:
        #     apply_async(func,args=(item,),kwargs={},
        #                 callback=callback,error_callback=error_callback)
        #
        # always return None
        for item in iterable:
            self.apply_async(func,args=(item,),
                             callback=callback,error_callback=error_callback)

    def ucbmap(self,func,iterable,callback=None,error_callback=None):
        # unique version of ThreadPool.cbmap
        return self.cbmap(func,self._uiter(iterable),callback,error_callback)

    def umap(self,func,iterable,chunksize=None):
        # unique version of ThreadPool.map
        return self.map(func,self._uiter(iterable),chunksize=chunksize)

    def umap_async(self,func,iterable,chunksize=None,
                   callback=None,error_callback=None):
        # unique version of ThreadPool.map_async
        return self.map_async(
            func,self._uiter(iterable),chunksize,
            callback,error_callback)

    def uimap(self,func,iterable,chunksize=None):
        # unique version of ThreadPool.imap
        return self.imap(func,self._uiter(iterable),chunksize)

    def uimap_unordered(self,func,iterable,chunksize=None):
        # unique version of ThreadPool.imap_unordered
        return self.imap_unordered(func,self._uiter(iterable),chunksize)

    def ustarmap(self,func,iterable,chunksize=None):
        # unique version of ThreadPool.starmap
        return self.starmap(func,self._uiter(iterable),chunksize)

    def ustarmap_async(self,func,iterable,chunksize=None,
                       callback=None,error_callback=None):
        # unique version of ThreadPool.starmap_async
        return self.starmap_async(
            func,self._uiter(iterable),chunksize,
            callback,error_callback)

    def close(self):
        # same as Pool.close
        self._closed=True
        return self._pool.close()

    def terminate(self):
        # same as Pool.terminate
        self._closed=True
        return self._pool.terminate()

    def renew(self):
        # terminate all process and start a new clean pool
        with self._lock:
            self.terminate()
            self._pool=Pool(self._processes)
            self._closed=False

    @property
    def closed(self):
        # True if ThreadPool closed
        return self._closed

    def __enter__(self):
        return self

    def __exit__(self,etype,value,tb):
        self.terminate()
Example #7
0
                timeNow = int(str(item['time'])[0:-3])
                timeLocal = time.localtime(timeNow)
                comments['时间'] = time.strftime("%Y-%m-%d %H:%M:%S", timeLocal)
                write_to_csv(comments)
                print(item['content'])
        except Exception as error:
            print(error)
    else:
        print(req.status_code)


if __name__ == '__main__':
    _id = sys.argv[1]
    create_csv()
    pool = Pool(20)
    count = 0
    url = "http://music.163.com/weapi/v1/resource/comments/R_SO_4_{}/?csrf_token="
    musicURL = url.format(_id)
    total = fetchNum(musicURL, _id)
    nums = int(total / 100) + 2
    args = []
    for num in range(1, nums):
        args.append((musicURL, _id, str((num - 1) * 100)))
    pool.starmap_async(fetch, args)
    pool.close()
    pool.join()
    print(count)

end = time.time()
print(end - start)
Example #8
0
    def extractAllAndCompare(self):
        self.customScoreLabel.hide()
        names = [
            "times of india", "the hindu", "guardian", "new york times",
            "google news", "cnn", "reddit news", "reddit world news",
            "telegraph", "bbc"
        ]

        today = str(datetime.date.today())
        directory = "./data/allFiles/" + today
        if not os.path.exists(directory):
            os.makedirs(directory)
        storageFile = directory + "/allValueFiles.txt"

        if not os.path.exists(storageFile):
            e = multiprocessing.Event(
            )  # Passing it since argument is required, nothing to sync
            queue = multiprocessing.Queue(
            )  # To get score file from threaded process

            pool = ThreadPool(4)
            results = pool.starmap_async(extractorRunner.runScrapper,
                                         zip(names, repeat(e), repeat(queue)),
                                         chunksize=1)
            while not results.ready():
                self.extractingAllLabel.setText(
                    _fromUtf8(
                        "<html><head/><body><p align=\"center\"><span style=\" font-size:14pt;font-family:'Lucida Calligraphy';\
                    font-weight:600; color:black;\">\
                     Extracting And Analyzing All Sources: " +
                        str(10 - results._number_left) +
                        "/10<u></u></span></p></body></html>"))
                self.extractingAllLabel.show()
                QApplication.processEvents()

            pool.close()
            pool.join()  # Wait for all threads to return

            outputfiles = ""
            for i in range(10):
                outputfiles += " " + queue.get()

            with open(storageFile, "w") as temp:
                temp.write(outputfiles)

        self.extractingAllLabel.hide()
        QApplication.processEvents()
        # Show comparision graph
        outputProcess = subprocess.Popen("python -m ui.comparingAll " +
                                         storageFile)
        # Check if best source file exists
        today = str(datetime.date.today())
        directory = "./data/BestSource/" + today
        if not os.path.exists(directory):
            os.makedirs(directory)
        best_source_file = directory + "/source.txt"
        while not os.path.exists(best_source_file):
            time.sleep(0.5)

        with open("./data/done.txt", "w") as file:  # For server service
            file.write("Done")

        outputProcess.wait()
        QApplication.processEvents()
Example #9
0
        self.path = file_name.split('.')[0]
        self.lock = RLock()
        self.pwd = pwd
        self.zip = zipfile.ZipFile(self.file_name)
        self.zip.setpassword(pwd=self.pwd)
        self.files = self.zip.namelist()

    # def get_file(self):
    #     for self.file in self.files:
    #         yield self.file

    def un_zip(self, file):
        # with self.lock:
        self.zip.extract(file, self.path)

    def extr_all(self):
        self.zip.extractall(path=self.path, pwd=self.pwd)


if __name__ == "__main__":
    file_name = r"/Users/lihailong/Desktop/未命名文件夹/用户画像.zip"
    orpwd = "cnd2018知识库"
    pwd = orpwd.encode("gbk")
    unzip = Zip(file_name, pwd)
    po = Pool(10)
    files = unzip.files
    # unzip.extr_all()
    po.starmap_async(unzip.un_zip, zip(files))
    po.close()
    po.join()
Example #10
0
class ImageHandler:
    dir_postfix = 'imgc'
    dir_files = '_files-imgc'

    imgs_done = 0
    imgs_total = 0

    def __init__(self, queue=None, **kwargs):
        self.__dict__.update(kwargs)
        self.images = []  # list of (src, dst) tuples
        # threading.Thread.__init__(self)
        # self.queue = queue
        # self._stop = threading.Event()
        self.finished = False
        self.generate_image_paths()

    def generate_image_paths(self):
        for arg in self.src_images:

            # process directory path
            if os.path.isdir(arg):
                src = os.path.abspath(arg)
                dst = os.path.abspath("{}-{}".format(src, self.dir_postfix))
                if not os.path.exists(dst):
                    os.makedirs(dst)  # target path must always exist

                for root, dirs, files in os.walk(src):
                    for d in dirs:
                        src_dir = os.path.join(root, d)
                        dst_dir = src_dir.replace(src, dst)
                    for f in files:
                        if extension(f) not in IMAGE_EXTS: continue
                        # if os.path.splitext(f)[1][1:].strip().lower() not in IMAGE_EXTS: continue
                        src_file = os.path.join(root, f)
                        dst_file = src_file.replace(src, dst)

                        parent_dir = os.path.dirname(dst_file)
                        print(parent_dir)
                        if not os.path.exists(parent_dir):
                            os.makedirs(parent_dir)
                        print(dst_file)
                        self.append((src_file, dst_file))

            # process file path
            elif os.path.isfile(arg):
                src = os.path.abspath(arg)
                dst_dir = os.path.join(os.path.dirname(src), self.dir_files)
                if not os.path.exists(dst_dir):
                    os.makedirs(dst_dir)
                dst = os.path.join(dst_dir, os.path.basename(arg))
                self.append((src, dst))
            # print (self.images)
        self.imgs_total = len(self.images)

    # def stop(self):
    #     self._stop.set()

    # def stopped(self):
    #     return self._stop.isSet()

    def terminate_pool(self):
        if not self.finished:
            self.pool.close()
            self.pool.terminate()
        return self.finished

    def on_finish(self, x):
        print("{}: finished successfully!".format(self.__class__.__name__))
        self.finished = True
        self.error = False

    def on_error(self, x):
        raise x
        # print("{}: error - {}".format(self.__class__.__name__, str(x)))
        # self.finished = True
        # self.error = True

    def run(self):

        if not self.images:
            print("No images found!")
            sys.exit()

        # print("Images found: {}".format(self.imgs_total))
        print("Images found: {}".format(len(self.images)))
        # print(self.images)

        self.pool = ThreadPool(self.workers)

        time_start = time.time()
        # self.pool.starmap(self.resize_image, self.images)
        self.pool.starmap_async(self.resize_image, self.images, 1,
                                self.on_finish, self.on_error)
        # pool.close()
        # pool.join()
        # time_end = time.time()

        # # window = tkinter.Tk()
        # # window.wm_withdraw()
        # # tkinter.messagebox.showinfo('imgc - work finished', 'Images compressed: {}'.format(self.imgs_done))
        # print("Time elapsed: {}".format(time_end - time_start))

    def append(self, path_tuple):
        self.images.append(path_tuple)

    def print_status(self, dst):
        print("[{}/{}] processed image {}".format(self.imgs_done,
                                                  self.imgs_total,
                                                  os.path.split(dst)[1]))

    # FIXME: add watermark feature
    # def watermark():
    #     try:
    #         watermark = Image.open(self.wmfile)
    #         # if extension(self.wmfile) in "png":
    #         #     watermark.load()
    #         wm_oldsize = watermark.size
    #         wm_newdim = min(wnew, hnew) * 0.2
    #         wm_oldindex, wm_olddim = min(enumerate(wm_oldsize))
    #         print(wm_oldindex, wm_olddim)
    #         wm_ratio = wm_newdim/wm_olddim
    #         wm_newsize = (int(wm_oldsize[0] * wm_ratio),
    #                       int(wm_oldsize[1] * wm_ratio))
    #         print(wm_newsize)
    #         watermark = watermark.resize(wm_newsize)
    #         # mask = watermark.convert("L").point(lambda x: min(x, 50))
    #         # .point(lambda x: 240)
    #         # mask.show()
    #         # watermark.putalpha(mask)
    #         im.paste(watermark, (0, 0), watermark)
    #     except AttributeError:
    #         pass

    def resize_image(self, src, dst):
        # print("resizing")
        pattern = self.size
        quality = self.quality

        try:
            im = Image.open(src)

            new_size = ImageSize.parse(pattern, image=im)
            # print('new_size:', new_size)
            im = im.resize(new_size, Image.BICUBIC)

            if os.path.splitext(dst)[1][1:].strip().lower() not in IMAGE_JPG:
                im.save(dst)
            else:
                # quality supported by jpegs only
                im.save(dst, 'JPEG', quality=quality)

            print("saved: %s" % dst)
            self.imgs_done += 1
        except OSError as err:  # e.g. file is corrupt and cannot be open
            print('ERROR')
            print(err)

        try:
            self.on_image_processed(self.imgs_done, self.imgs_total)
            print("method called")
        except AttributeError:
            self.print_status(dst)