Esempio n. 1
0
 def run(self):
     start = time.time()
     if self.workers == 0:
         self.count_direct(self.f1)        
     elif self.workers == 1:
         self.count_single(self.f1, self.filesize)
     else:
         pool = Pool(self.workers)
         res_list = []
         for i in range(self.workers):
             p1 = self.filesize * i // self.workers 
             p2 = self.filesize * (i+1) // self.workers 
             args = [self, self.f1, p1, p2, self.filesize]
             res = pool.apply_async(func=wrap, args=args)
             res_list.append(res)
         pool.close()
         pool.join()
         self._c.update(reduce(operator.add, [r.get() for r in res_list]))            
     if self.f2:
         with open(self.f2, 'wb') as f:
             f.write(self.result.encode(self.coding))
     else:
         print(self.result)
     cost = '{:.1f}'.format(time.time()-start)
     size = humansize(self.filesize)
     tip = '\nFile size: {}. Workers: {}. Cost time: {} seconds'     
     print(tip.format(size, self.workers, cost))
     self.cost = cost + 's'
Esempio n. 2
0
    def __init__(self,
                 name,
                 magnet=None,
                 size=None,
                 uploader=None,
                 date=None,
                 seed_count=None,
                 leech_count=None,
                 url=None):
        self.name = name
        self.magnet = magnet
        self.size = size
        self.uploader = uploader
        self.date = date
        self.seed_count = seed_count
        self.leech_count = leech_count
        self.url = url

        if (size != '' and representsInteger(size)):
            self.human_size = humansize(int(size))
Esempio n. 3
0
def main():
    if len(sys.argv) > 2:
        from_file, to_file = sys.argv[1:3]
    # 在上一级目录的var文件夹中,生成测试用大文件
    if os.path.dirname(__file__) in ['test']:
        dir_of_bigfile = os.path.join('..', 'var')    
    else:
        dir_of_bigfile = 'var' 
    if not os.path.exists(dir_of_bigfile):
        os.mkdir(dir_of_bigfile)
    from_file, to_file = '100lines.txt', 'count_result.txt'    

    with open(from_file, 'rb') as f:
        s = f.read()
    files = []
    for i in [2000, 10000, 20000, 100000, 200000]:
        fn = '{}thousandlines.txt'.format(i//10)  
        ffn = os.path.join(dir_of_bigfile, fn)
        files.append(ffn)
        if not os.path.exists(ffn):
            with open(ffn, 'wb') as f:
                f.write(s*i)
                
    ps = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] #待测试的进程数
    pre = '{:8}' * (len(ps) + 1)
    title = ['size'] + ['{}ps'.format(i) for i in ps]
    L = [pre.format(*title)]
    for i in files:
        size = os.path.getsize(i)
        ws = [WordCounter(i, to_file, p) for p in ps]
        [w.run() for w in ws]
        title = [humansize(size)] + [w.cost for w in ws]
        L.append(pre.format(*title))
        print('-'*40)
    t =  'cpu_count = {}, now = {}'.format(cpu_count(), datetime.now())
    result = '\n'.join([sys.version, t] + L +['-'*75, ''])
    print(result) 
    with open('test_result.txt', 'ab') as f:
        f.write(result.encode('utf-8'))  
Esempio n. 4
0
   def parse_xml_for_torrents(self, raw_xml):
      """
      Finds a specific XML attribute given a element name
      :param jackett.Jackett self: object instance
      :param bytes raw_xml: the xml page returned by querying jackett
      :return: all the torrents we found in the xml page
      :rtype: list
      """
      tree = ET.fromstring(raw_xml)
      channel = tree.find('channel')
      results = []
      for child in channel.findall('item'):
         title = self.find_xml_attribute(child, 'title')
         date = self.find_xml_attribute(child, 'pubDate')
         magnet = self.find_xml_attribute(child, 'link')
         size = self.find_xml_attribute(child, 'size')
         files = self.find_xml_attribute(child, 'files')
         foundUploader = re.findall('-1? *\w*', title)
         if len(foundUploader) > 0:
            uploader = str(foundUploader[-1][1:])
         else:
            uploader = None
         seeders = 0
         peers = 0

         for elm in child.findall('{http://torznab.com/schemas/2015/feed}attr'):
            if elm.get('name') == 'seeders':
               seeders = elm.get('value')
            if elm.get('name') == 'peers':
               peers = elm.get('value')
            if size != '' and representsInteger(size):
               size = humansize(int(size))

         logger.debug('Found torrent with info: \n\ttitle: {}\n\tmagnet: {}\n\tsize: {}\n\tdate: {}\
            \n\tseeders: {}\n\tpeers: {}'.format(title, magnet, size, date, seeders, peers))
         torrent = Torrent(title, magnet=magnet, size=size, uploader=uploader, date=date, seed_count=seeders, leech_count=peers)
         results.append(torrent)

      return results
Esempio n. 5
0
def main():
    if len(sys.argv) > 2:
        from_file, to_file = sys.argv[1:3]
    # 在上一级目录的var文件夹中,生成测试用大文件
    if os.path.dirname(__file__) in ["test"]:
        dir_of_bigfile = os.path.join("..", "var")
    else:
        dir_of_bigfile = "var"
    if not os.path.exists(dir_of_bigfile):
        os.mkdir(dir_of_bigfile)
    from_file, to_file = "100lines.txt", "count_result.txt"

    with open(from_file, "rb") as fp:
        s = fp.read()
    files = []
    for i in [2000, 10000, 20000, 100000, 200000]:
        fn = "{}thousandlines.txt".format(i // 10)
        ffn = os.path.join(dir_of_bigfile, fn)
        files.append(ffn)
        if not os.path.exists(ffn):
            with open(ffn, "wb") as fp:
                fp.write(s * i)

    ps = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]  # 待测试的进程数
    pre = "{:8}" * (len(ps) + 1)
    title = ["size"] + ["{}ps".format(i) for i in ps]
    L = [pre.format(*title)]
    for i in files:
        size = os.path.getsize(i)
        ws = [WordCounter(i, to_file, p) for p in ps]
        [w.run() for w in ws]
        title = [humansize(size)] + [w.cost for w in ws]
        L.append(pre.format(*title))
        print("-" * 40)
    t = "cpu_count = {}, now = {}".format(cpu_count(), datetime.now())
    result = "\n".join([sys.version, t] + L + ["-" * 75, ""])
    print(result)
    with open("test_result.txt", "ab") as fp:
        fp.write(result.encode("utf-8"))
def countwords(from_file, to_file, workers=1):
    start = time.time()
    f_size = os.path.getsize(from_file)
    if workers == 1:
        c = counter_single(from_file, f_size)
    else:
        res_list, threads = [], []
        for i in range(workers):
            p1, p2 = f_size // workers * i, f_size // workers * (i + 1)
            args = [from_file, p1, p2, f_size]
            t = Thread(target=word_count, args=args)
            t.start()
            threads.append(t)
        [t.join() for t in threads]
        while not q.empty():
            res_list.append(q.get())
        c = reduce(operator.add, [res for res in res_list])
    write_result(c, to_file)
    cost = '{:.1f}'.format(time.time() - start)
    size = humansize(f_size)
    tip = '\n{}File size: {}. Workers: {}. Cost time: {} seconds'
    # 显示光标: '\33[?25h'
    print(tip.format('\33[?25h', size, workers, cost))
    return cost + 's'
Esempio n. 7
0
    def get_status(self, dld):

        # default
        status = {
            'id': dld.id,
            'info': model_to_dict(dld),
            'status': 'created',
            'progress': 0,
            'speed': '',
            'time_left': '',
            'eta': '',
        }

        # the full path to the file
        fullpath = self.__get_fullpath(dld)
        elapsed = datetime.datetime.now() - dld.started_at
        elapsed = elapsed.days * 86400 + elapsed.seconds

        # first leave starting status
        if dld.status == consts.STATUS_STARTING:
            if os.path.exists(fullpath):
                dld.status = consts.STATUS_DOWNLOADING
                dld.save()
            else:
                if elapsed > 5:
                    status['status'] = 'error'
                else:
                    status['status'] = 'starting'

        # then downloading is the more and may update the status
        if dld.status == consts.STATUS_DOWNLOADING:

            # first check file
            if os.path.exists(fullpath):
                statinfo = os.stat(fullpath)
                currsize = statinfo.st_size
                if currsize == dld.filesize:
                    dld.status = consts.STATUS_COMPLETED
                    dld.save()
                else:

                    if dld.filesize > 0:
                        status['progress'] = '{0:2.1f}'.format(
                            currsize / dld.filesize * 100)

                    # now check processs
                    try:
                        p = psutil.Process(dld.pid)
                        if p.status() in (psutil.STATUS_STOPPED,
                                          psutil.STATUS_ZOMBIE,
                                          psutil.STATUS_DEAD):
                            raise RuntimeError('Invalid process status: ' +
                                               p.status())

                        # download in progress
                        status['status'] = 'downloading'

                        # calculate download speed
                        if elapsed > 0:

                            # get previous statuses
                            statuses = []
                            if not dld.progress is None:
                                statuses = json.loads(dld.progress)

                            # remove old statuses and add latest
                            statuses = list(
                                filter(
                                    lambda x: elapsed - x['elapsed'] > 0 and
                                    elapsed - x['elapsed'] <= consts.
                                    SPEED_CALC_TIME_RANGE, statuses))
                            statuses.append({
                                'elapsed': elapsed,
                                'size': currsize,
                            })

                            # now take oldest one and calc speed
                            oldest = statuses[0]
                            if elapsed != oldest['elapsed']:

                                # calculate speed
                                speed = (currsize - oldest['size']) / (
                                    elapsed - oldest['elapsed'])
                                status['speed'] = utils.humansize(speed) + '/s'

                                # calculate left
                                if dld.filesize > 0 and speed > 0:
                                    left_bytes = dld.filesize - currsize
                                    left_seconds = left_bytes / speed
                                    delta = datetime.timedelta(0, left_seconds)
                                    status['time_left'] = str(delta).split(
                                        '.')[0]
                                    status[
                                        'eta'] = '{0:%Y-%m-%d %H:%M:%S}'.format(
                                            datetime.datetime.now() + delta)

                            # save statuses
                            dld.progress = json.dumps(statuses)
                            dld.save()

                    except Exception as ex:
                        print('Reporting error during download', dld.filename,
                              ex)
                        status['status'] = 'error'
                        status['error'] = repr(ex)

            else:
                print('File does not exist during download', dld.filename)
                status['status'] = 'error'

        # others are simple
        if dld.status == consts.STATUS_ERROR:
            status['status'] = 'error'
        if dld.status == consts.STATUS_COMPLETED:
            status['status'] = 'completed'
            status['progress'] = 100
        if dld.status == consts.STATUS_PROCESSED:
            status['status'] = 'processed'
            status['progress'] = 100
        if dld.status == consts.STATUS_CANCELLED:
            status['status'] = 'cancelled'
            status['progress'] = 0

        # done
        return status