def run(self): start = time.time() if self.workers == 0: self.count_direct(self.f1) elif self.workers == 1: self.count_single(self.f1, self.filesize) else: pool = Pool(self.workers) res_list = [] for i in range(self.workers): p1 = self.filesize * i // self.workers p2 = self.filesize * (i+1) // self.workers args = [self, self.f1, p1, p2, self.filesize] res = pool.apply_async(func=wrap, args=args) res_list.append(res) pool.close() pool.join() self._c.update(reduce(operator.add, [r.get() for r in res_list])) if self.f2: with open(self.f2, 'wb') as f: f.write(self.result.encode(self.coding)) else: print(self.result) cost = '{:.1f}'.format(time.time()-start) size = humansize(self.filesize) tip = '\nFile size: {}. Workers: {}. Cost time: {} seconds' print(tip.format(size, self.workers, cost)) self.cost = cost + 's'
def __init__(self, name, magnet=None, size=None, uploader=None, date=None, seed_count=None, leech_count=None, url=None): self.name = name self.magnet = magnet self.size = size self.uploader = uploader self.date = date self.seed_count = seed_count self.leech_count = leech_count self.url = url if (size != '' and representsInteger(size)): self.human_size = humansize(int(size))
def main(): if len(sys.argv) > 2: from_file, to_file = sys.argv[1:3] # 在上一级目录的var文件夹中,生成测试用大文件 if os.path.dirname(__file__) in ['test']: dir_of_bigfile = os.path.join('..', 'var') else: dir_of_bigfile = 'var' if not os.path.exists(dir_of_bigfile): os.mkdir(dir_of_bigfile) from_file, to_file = '100lines.txt', 'count_result.txt' with open(from_file, 'rb') as f: s = f.read() files = [] for i in [2000, 10000, 20000, 100000, 200000]: fn = '{}thousandlines.txt'.format(i//10) ffn = os.path.join(dir_of_bigfile, fn) files.append(ffn) if not os.path.exists(ffn): with open(ffn, 'wb') as f: f.write(s*i) ps = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] #待测试的进程数 pre = '{:8}' * (len(ps) + 1) title = ['size'] + ['{}ps'.format(i) for i in ps] L = [pre.format(*title)] for i in files: size = os.path.getsize(i) ws = [WordCounter(i, to_file, p) for p in ps] [w.run() for w in ws] title = [humansize(size)] + [w.cost for w in ws] L.append(pre.format(*title)) print('-'*40) t = 'cpu_count = {}, now = {}'.format(cpu_count(), datetime.now()) result = '\n'.join([sys.version, t] + L +['-'*75, '']) print(result) with open('test_result.txt', 'ab') as f: f.write(result.encode('utf-8'))
def parse_xml_for_torrents(self, raw_xml): """ Finds a specific XML attribute given a element name :param jackett.Jackett self: object instance :param bytes raw_xml: the xml page returned by querying jackett :return: all the torrents we found in the xml page :rtype: list """ tree = ET.fromstring(raw_xml) channel = tree.find('channel') results = [] for child in channel.findall('item'): title = self.find_xml_attribute(child, 'title') date = self.find_xml_attribute(child, 'pubDate') magnet = self.find_xml_attribute(child, 'link') size = self.find_xml_attribute(child, 'size') files = self.find_xml_attribute(child, 'files') foundUploader = re.findall('-1? *\w*', title) if len(foundUploader) > 0: uploader = str(foundUploader[-1][1:]) else: uploader = None seeders = 0 peers = 0 for elm in child.findall('{http://torznab.com/schemas/2015/feed}attr'): if elm.get('name') == 'seeders': seeders = elm.get('value') if elm.get('name') == 'peers': peers = elm.get('value') if size != '' and representsInteger(size): size = humansize(int(size)) logger.debug('Found torrent with info: \n\ttitle: {}\n\tmagnet: {}\n\tsize: {}\n\tdate: {}\ \n\tseeders: {}\n\tpeers: {}'.format(title, magnet, size, date, seeders, peers)) torrent = Torrent(title, magnet=magnet, size=size, uploader=uploader, date=date, seed_count=seeders, leech_count=peers) results.append(torrent) return results
def main(): if len(sys.argv) > 2: from_file, to_file = sys.argv[1:3] # 在上一级目录的var文件夹中,生成测试用大文件 if os.path.dirname(__file__) in ["test"]: dir_of_bigfile = os.path.join("..", "var") else: dir_of_bigfile = "var" if not os.path.exists(dir_of_bigfile): os.mkdir(dir_of_bigfile) from_file, to_file = "100lines.txt", "count_result.txt" with open(from_file, "rb") as fp: s = fp.read() files = [] for i in [2000, 10000, 20000, 100000, 200000]: fn = "{}thousandlines.txt".format(i // 10) ffn = os.path.join(dir_of_bigfile, fn) files.append(ffn) if not os.path.exists(ffn): with open(ffn, "wb") as fp: fp.write(s * i) ps = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] # 待测试的进程数 pre = "{:8}" * (len(ps) + 1) title = ["size"] + ["{}ps".format(i) for i in ps] L = [pre.format(*title)] for i in files: size = os.path.getsize(i) ws = [WordCounter(i, to_file, p) for p in ps] [w.run() for w in ws] title = [humansize(size)] + [w.cost for w in ws] L.append(pre.format(*title)) print("-" * 40) t = "cpu_count = {}, now = {}".format(cpu_count(), datetime.now()) result = "\n".join([sys.version, t] + L + ["-" * 75, ""]) print(result) with open("test_result.txt", "ab") as fp: fp.write(result.encode("utf-8"))
def countwords(from_file, to_file, workers=1): start = time.time() f_size = os.path.getsize(from_file) if workers == 1: c = counter_single(from_file, f_size) else: res_list, threads = [], [] for i in range(workers): p1, p2 = f_size // workers * i, f_size // workers * (i + 1) args = [from_file, p1, p2, f_size] t = Thread(target=word_count, args=args) t.start() threads.append(t) [t.join() for t in threads] while not q.empty(): res_list.append(q.get()) c = reduce(operator.add, [res for res in res_list]) write_result(c, to_file) cost = '{:.1f}'.format(time.time() - start) size = humansize(f_size) tip = '\n{}File size: {}. Workers: {}. Cost time: {} seconds' # 显示光标: '\33[?25h' print(tip.format('\33[?25h', size, workers, cost)) return cost + 's'
def get_status(self, dld): # default status = { 'id': dld.id, 'info': model_to_dict(dld), 'status': 'created', 'progress': 0, 'speed': '', 'time_left': '', 'eta': '', } # the full path to the file fullpath = self.__get_fullpath(dld) elapsed = datetime.datetime.now() - dld.started_at elapsed = elapsed.days * 86400 + elapsed.seconds # first leave starting status if dld.status == consts.STATUS_STARTING: if os.path.exists(fullpath): dld.status = consts.STATUS_DOWNLOADING dld.save() else: if elapsed > 5: status['status'] = 'error' else: status['status'] = 'starting' # then downloading is the more and may update the status if dld.status == consts.STATUS_DOWNLOADING: # first check file if os.path.exists(fullpath): statinfo = os.stat(fullpath) currsize = statinfo.st_size if currsize == dld.filesize: dld.status = consts.STATUS_COMPLETED dld.save() else: if dld.filesize > 0: status['progress'] = '{0:2.1f}'.format( currsize / dld.filesize * 100) # now check processs try: p = psutil.Process(dld.pid) if p.status() in (psutil.STATUS_STOPPED, psutil.STATUS_ZOMBIE, psutil.STATUS_DEAD): raise RuntimeError('Invalid process status: ' + p.status()) # download in progress status['status'] = 'downloading' # calculate download speed if elapsed > 0: # get previous statuses statuses = [] if not dld.progress is None: statuses = json.loads(dld.progress) # remove old statuses and add latest statuses = list( filter( lambda x: elapsed - x['elapsed'] > 0 and elapsed - x['elapsed'] <= consts. SPEED_CALC_TIME_RANGE, statuses)) statuses.append({ 'elapsed': elapsed, 'size': currsize, }) # now take oldest one and calc speed oldest = statuses[0] if elapsed != oldest['elapsed']: # calculate speed speed = (currsize - oldest['size']) / ( elapsed - oldest['elapsed']) status['speed'] = utils.humansize(speed) + '/s' # calculate left if dld.filesize > 0 and speed > 0: left_bytes = dld.filesize - currsize left_seconds = left_bytes / speed delta = datetime.timedelta(0, left_seconds) status['time_left'] = str(delta).split( '.')[0] status[ 'eta'] = '{0:%Y-%m-%d %H:%M:%S}'.format( datetime.datetime.now() + delta) # save statuses dld.progress = json.dumps(statuses) dld.save() except Exception as ex: print('Reporting error during download', dld.filename, ex) status['status'] = 'error' status['error'] = repr(ex) else: print('File does not exist during download', dld.filename) status['status'] = 'error' # others are simple if dld.status == consts.STATUS_ERROR: status['status'] = 'error' if dld.status == consts.STATUS_COMPLETED: status['status'] = 'completed' status['progress'] = 100 if dld.status == consts.STATUS_PROCESSED: status['status'] = 'processed' status['progress'] = 100 if dld.status == consts.STATUS_CANCELLED: status['status'] = 'cancelled' status['progress'] = 0 # done return status