def download_file(self, url, name, path, flag='', timeout=200): output = StringIO.StringIO() start = time.time() r = self.session.get(url, stream=True, timeout=timeout) length = int(r.headers.get('content-length', 0)) save = 0.0 modulus = 1024 speed = 0 for chunk in r.iter_content(modulus * 100): output.write(chunk) # progress bar save += len(chunk) t = time.time() - start if t != 0: speed = save / t if length: size = sizeof_fmt(length) rate = '{}%'.format(int(save / length * 100)) sys.stdout.flush() sys.stdout.write('\r{} {} {} {}/s'.format(flag, rate, size, sizeof_fmt(speed))) else: sys.stdout.flush() sys.stdout.write('\r{} {} {}/s'.format(flag, sizeof_fmt(save), sizeof_fmt(speed))) name = clean_filename(name) with open(os.path.join(path, name), 'wb') as fd: fd.write(output.getvalue()) sys.stdout.flush() d = '\r{} {}|{} {:.1f}s {}'.format(flag, sizeof_fmt(save), sizeof_fmt(length), time.time() - start, r.url) logger.info(d)
def __init__(self, username='', password=''): self.session = requests.Session() if username and password: if self.login(username, password): self.auth = True logger.info(u'%s登录成功', self) else: self.auth = False logger.info(u'%s登录失败!', self)
def save_doc(self, data): """ mkdir dir to save the document attachements data = { 'title': 'title', 'note': 'note', 'files': [('url', 'name'),...] } """ FILENAMES.setdefault(self.NAME, []).append(data) title = data['title'].strip() path = mkdir_p(clean_filename(title)) logger.info(u' → {}'.format(title)) note = data['note'] if isinstance(note, basestring): note = note.strip() if note: self.write_note(note, path) logger.debug(u'通知: %s' % guess_abstract(note)) for i, (url, name) in enumerate(data['files'], 1): self.download_file(url, name, path, '(%s/%s)' % (i, len(data['files'])))
def mail_parser(self, mail_box_id): mail_detail = self.session.get(self.MailDetail_URL, params={'mailBoxId': mail_box_id}) mail_content = PyQuery(mail_detail.text) tds = mail_content('tr td') # if len(tds) != 6: # logger.error('mailBoxId:%s\n%s\n' %(mail_box_id, mail_detail.text)) # raise ValueError try: (topic, date, sender, addr, att, content), rest = tds[:6], tds[6:] except ValueError as e: logger.error(e, type(tds), len(tds), tds.text) if rest: att = content content = rest[0] logger.info('mailBoxId:%s\n%s' %(mail_box_id, mail_detail.text)) ids_names = [(re.search('Id:\'(.+)\'', d.find('a').get('onclick')).group(1), d.find('a').text) for d in att] note = content.text if type(content) == etree._Element else content.text_content() return { 'title': topic.text, 'note': note.strip(), 'files': [('%s?id=%s' % (self.DOWNLOAD_URL, i), name) for i, name in ids_names], }
def downloadfile_info(self, count): if count > 0: logger.info(u'%s有%d个新文件' % (self, count)) else: logger.info(u'%s没有新文件' % self)