def __init__(self, ui): self.ui = ui if os.name == 'posix': verify = True else: verify = False self.vnt = Vinanti(block=True, hdrs={'User-Agent':self.ui.user_agent}, verify=verify)
def test_delete(self): urls = ['http://httpbin.org/delete', 'http://httpbin.org/delete'] vnt = Vinanti(block=False) vnt.delete(urls, onfinished=hello, hdrs=self.hdr, data={'garfield': 'peanuts'})
def test_patch(self): url = 'http://httpbin.org/patch' vnt = Vinanti(block=True) vnt.patch(url, onfinished=hello, hdrs=self.hdr, data={'gotham city': 'rajnagar'})
def test_put(self): url = 'http://httpbin.org/put' vnt = Vinanti(block=True) vnt.put(url, onfinished=hello, hdrs=self.hdr, data={'calvin': 'hobbes'})
def test_delete(self): url = 'http://httpbin.org/delete' vnt = Vinanti(block=True) vnt.delete(url, onfinished=hello, hdrs=self.hdr, data={'garfield': 'peanuts'})
def test_patch(self): urls = ['http://httpbin.org/patch', 'http://httpbin.org/patch'] vnt = Vinanti(block=False) vnt.patch(urls, onfinished=hello, hdrs=self.hdr, data={'gotham city': 'rajnagar'})
def __init__(self, base_url=None, lang='en', wait=None, episode_summary=False, search_and_grab=True, backend=None, hdrs=None): if not base_url: self.base_url = 'https://www.thetvdb.com' else: self.base_url = base_url self.language = lang if hdrs: self.hdrs = hdrs else: self.hdrs = {'User-Agent':'Mozilla/5.0'} if os.name == 'posix': verify = True else: verify = False if isinstance(wait, int) or isinstance(wait, float): self.vnt = Vinanti(block=False, hdrs=self.hdrs, wait=wait, timeout=10, verify=verify) else: self.vnt = Vinanti(block=False, hdrs=self.hdrs, timeout=10, verify=verify) self.fanart_list = [] self.poster_list = [] self.banner_list = [] self.final_dict = {} self.time = time.time() self.ep_summary = episode_summary self.search_and_grab = search_and_grab self.backend = Backend(hdrs) self.backend_search = backend
def test_put(self): urls = ['http://httpbin.org/put', 'http://httpbin.org/put'] vnt = Vinanti(block=False) vnt.put(urls, onfinished=hello, hdrs=self.hdr, data={'calvin': 'hobbes'})
def test_post(self): urls = ['http://httpbin.org/post', 'http://httpbin.org/post'] vnt = Vinanti(block=False) vnt.post(urls, onfinished=hello, hdrs=self.hdr, data=(('moe', 'curly'), ('moe', 'larry')))
def test_post_more(self): urls = ['http://httpbin.org/post', 'http://httpbin.org/post'] vnt = Vinanti(block=False) vnt.post(urls, onfinished=hello, hdrs=self.hdr, data={ 'yotsubato': 'aria', 'mushishi': 'kino' })
def test_crawl_urllib(self): vnt = Vinanti(block=False, backend='urllib', max_requests=5, hdrs=hdr, session=True, loop_forever=False, wait=0.2) url = 'https://docs.python.org/3/reference/index.html' vnt.crawl(url, onfinished=partial(hello, vnt, 'test_crawl_urllib'))
def test_get_params(self): urls = ['http://httpbin.org/get', 'http://httpbin.org/get'] vnt = Vinanti(block=False) vnt.get(urls, onfinished=hello, hdrs=self.hdr, params={ 'billoo': 'diamond comics', 'dhruva': 'raj comics' })
def test_session(self): vnt = Vinanti(block=self.block, method='GET', onfinished=hello, hdrs=self.hdr, group_task=True) vnt.get('http://www.google.com', out='/tmp/1.html') vnt.add('http://www.wikipedia.org', out='/tmp/2.html') vnt.add('http://www.google.com', out='/tmp/3.html') vnt.start()
def test_proxies(self, test_url=None): vnt = Vinanti(block=False, multiprocess=True, timeout=30) url = test_url if test_url else "http://osu.ppy.sh/legal/terms" for p in self.proxies: vnt.head(url, hdrs={"User-Agent": self.get_useragent()}, proxies={ "http": "http://{}/".format(p), }, wait=0.5, onfinished=partial(self.__test_proxies_callback__, vnt, p))
def urls_to_ids(self, urls: list): vnt = Vinanti(block=True, hdrs={"User-Agent": "Mozilla/5.0"}, timeout=10) for url in urls: new_url = re.sub(r'/b/', '/beatmaps/', url) if not new_url == url: vnt.head(new_url, onfinished=self.urls_to_ids_callback) else: self.song_ids.append(get_song_id(url)) del vnt
def test_crawl_limit_urllib(self): vnt = Vinanti(block=False, backend='urllib', max_requests=5, hdrs=hdr, session=True, loop_forever=False, wait=0.2) url = 'https://docs.python.org/3/' vnt.crawl(url, depth_allowed=1, onfinished=partial(hello, vnt, 'test_crawl_limit_urllib'))
def __prepare_request__(self, hdrs_dict, auth_tuple, data_tuple, files_data, proxies, args): if args.no_verify: verify = False else: verify = True logger.debug('verify={}; cookie-unsafe={}'.format( verify, args.cookie_unsafe)) vnt = Vinanti(block=False, backend=args.backend, hdrs=hdrs_dict, wait=args.wait, max_requests=args.max_requests, continue_out=args.resume_download, verify=verify, auth=auth_tuple, data=data_tuple, cookie_unsafe=args.cookie_unsafe, charset=args.charset, timeout=args.timeout, proxies=proxies, files=files_data, session=args.accept_cookies) if args.input_files: self.__process_files_urls__(vnt, args) else: self.__final_request__(vnt, hdrs_dict, auth_tuple, data_tuple, files_data, proxies, args)
def request_cookie(self): cookies_hdrs = { 'authorization': "Basic eGVua2luZzoxOTI4Mzc0NjUwYXNk", 'content-type': "application/x-www-form-urlencoded", 'charset': "UTF-8", } payload = {'username': self['username'], 'password': self['password']} vnt_cookies = Vinanti(block=True, hdrs={"User-Agent": "Mozilla/5.0"}, multiprocess=True, session=True, timeout=60) vnt_cookies.post('https://osu.ppy.sh/session', onfinished=self.set_cookie_callback, hdrs=cookies_hdrs, data=payload)
def test_auth_noblock(self): vnt = Vinanti(block=False, log=logval, group_task=True) vnt.get(url1, onfinished=hello, hdrs=hdr, auth=('user-basic', 'password-basic')) vnt.add(url2, onfinished=hello, hdrs=hdr, auth_digest=('user-digest', 'password-digest')) vnt.start()
def test_proxy_noblock(self): vnt = Vinanti(block=False, log=logval, group_task=True) vnt.get('http://www.httpbin.org/ip', onfinished=hello, hdrs=hdr, proxies=proxies) vnt.add('http://www.httpbin.org/post', method='POST', data={'moe': 'curly'}, onfinished=hello, hdrs=hdr, proxies=proxies) vnt.start()
def __init__(self, song_urls, config_path=None, download_path=None, songs_path=None, auto_start=None, multiprocess=None, use_proxy=None): self._config = Config(config_path) self._config.update({ 'download_path': download_path, 'songs_path': songs_path, 'use_proxy': use_proxy }) self._header = { "User-Agent": "Mozilla/5.0", "Accept-Language": "en-US;q=0.9,en;q=0.8", "Accept": "text/html,application/xhtml+xml,application/xml", "Accept-Encoding": "gzip, deflate, br", "Cookie": self._config.get_cookie() } self._multiprocess = multiprocess if multiprocess else 6 self._proxy = Proxy( proxy_numbers=100) if self._config['use_proxy'] else None self.download_path = Path( self._config['download_path']).resolve(strict=True) self.songs_path = Path(self._config['songs_path']).resolve(strict=True) vnt_args = {"wait": 3, "timeout": 30, "max_requests": 5, "log": False} if self._multiprocess: vnt_args.update({ "multiprocess": True, "max_requests": self._multiprocess + 1 }) if self._proxy: vnt_args.update({"wait": 1, "timeout": 60}) self.vnt = Vinanti(**vnt_args) self.existed_ids = frozenset( get_existing_ids([self.songs_path, self.download_path])) self.song_ids = [] self.auto_start = auto_start self.urls_to_ids(song_urls)
def test_save_file_aio(self): vnt = Vinanti(block=False, backend='aiohttp') vnt.get('http://www.google.com', onfinished=hello, hdrs=self.hdr, out='/tmp/1_aio.html') vnt.get('http://www.wikipedia.org', onfinished=hello, hdrs=self.hdr, out='/tmp/2_aio.html') vnt.get('http://www.google.com', onfinished=hello, hdrs=self.hdr, out='/tmp/3_aio.html')
def test_save_file(self): vnt = Vinanti(block=False) vnt.get('http://www.google.com', onfinished=hello, hdrs=self.hdr, out='/tmp/1.html') vnt.get('http://www.wikipedia.org', onfinished=hello, hdrs=self.hdr, out='/tmp/2.html') vnt.get('http://www.google.com', onfinished=hello, hdrs=self.hdr, out='/tmp/3.html')
def test_no_async(self): vnt = Vinanti(block=True, hdrs=hdr) req = vnt.get('http://www.google.com') print(req.info) req = vnt.post('http://httpbin.org/post', data={'hello': 'world'}) print(req.html) req = vnt.get('http://www.wikipedia.org') print(req.info) req = vnt.get('http://httpbin.org/get', method='HEAD') print(req.info)
class CustomRead: readable_format = [ 'text/plain', 'text/html', 'text/htm', 'text/css', 'application/xhtml+xml', 'application/xml', 'application/json', ] mtype_list = [ 'text/htm', 'text/html', 'text/plain' ] vnt_noblock = Vinanti(block=False, hdrs={'User-Agent':settings.USER_AGENT}, backend=settings.VINANTI_BACKEND, max_requests=settings.VINANTI_MAX_REQUESTS) vnt = Vinanti(block=True, hdrs={'User-Agent':settings.USER_AGENT}) fav_path = settings.FAVICONS_STATIC VIDEO_ID_DICT = OrderedDict() CACHE_FILE = os.path.join(settings.TMP_LOCATION, 'cache') @classmethod def get_archived_file(cls, usr, url_id, mode='html', req=None, return_path=False): qset = Library.objects.filter(usr=usr, id=url_id) streaming_mode = False if not os.path.exists(settings.TMP_LOCATION): os.makedirs(settings.TMP_LOCATION) if qset: row = qset[0] media_path = row.media_path if mode in ['pdf', 'png'] and media_path: fln, ext = media_path.rsplit('.', 1) if mode == 'pdf': media_path = fln + '.pdf' elif mode == 'png': media_path = fln + '.png' elif mode == 'archive' and media_path: mdir, _ = os.path.split(media_path) filelist = os.listdir(mdir) mlist = [] extset = set(['pdf', 'png', 'htm', 'html']) for fl in filelist: ext = fl.rsplit('.', 1) if ext and ext[-1] not in extset: mlist.append(os.path.join(mdir, fl)) for mfile in mlist: if os.path.isfile(mfile) and os.stat(mfile).st_size: media_path = mfile streaming_mode = True break if streaming_mode and req: qlist = UserSettings.objects.filter(usrid=usr) if qlist and not qlist[0].media_streaming: streaming_mode = False if media_path and os.path.exists(media_path): mtype = guess_type(media_path)[0] if not mtype: mtype = 'application/octet-stream' ext = media_path.rsplit('.')[-1] if ext: filename = row.title + '.' + ext if '.' in row.title: file_ext = row.title.rsplit('.', 1)[-1] if ext == file_ext: filename = row.title else: filename = row.title + '.bin' if mtype in ['text/html', 'text/htm']: data = cls.format_html(row, media_path) return HttpResponse(data) elif streaming_mode: if os.path.isfile(cls.CACHE_FILE): with open(cls.CACHE_FILE, 'rb') as fd: cls.VIDEO_ID_DICT = pickle.load(fd) uid = str(uuid.uuid4()) uid = uid.replace('-', '') while uid in cls.VIDEO_ID_DICT: logger.debug("no unique ID, Generating again") uid = str(uuid.uuid4()) uid = uid.replace('-', '') time.sleep(0.01) cls.VIDEO_ID_DICT.update({uid:[media_path, time.time()]}) cls.VIDEO_ID_DICT.move_to_end(uid, last=False) if len(cls.VIDEO_ID_DICT) > settings.VIDEO_PUBLIC_LIST: cls.VIDEO_ID_DICT.popitem() with open(cls.CACHE_FILE, 'wb') as fd: pickle.dump(cls.VIDEO_ID_DICT, fd) if return_path: title_slug = slugify(row.title, allow_unicode=True) return '{}/getarchivedvideo/{}-{}'.format(usr.username, title_slug, uid) else: return cls.get_archived_video(req, usr.username, uid) else: response = FileResponse(open(media_path, 'rb')) mtype = 'video/webm' if mtype == 'video/x-matroska' else mtype response['mimetype'] = mtype response['content-type'] = mtype response['content-length'] = os.stat(media_path).st_size filename = filename.replace(' ', '.') logger.info('{} , {}'.format(filename, mtype)) if not cls.is_human_readable(mtype) and not streaming_mode: response['Content-Disposition'] = 'attachment; filename="{}"'.format(filename) return response else: return HttpResponse('<html>File has not been archived in this format</html>') else: return HttpResponse(status=404) @classmethod def get_archived_video(cls, request, username, video_id): if video_id in cls.VIDEO_ID_DICT: media_path, ltime = cls.VIDEO_ID_DICT.get(video_id) logger.debug('{} {}'.format(media_path, ltime)) if time.time() - ltime <= settings.VIDEO_ID_EXPIRY_LIMIT*3600: if os.path.isfile(media_path): mtype = guess_type(media_path)[0] if not mtype: mtype = 'application/octet-stream' range_header = request.META.get('HTTP_RANGE', '').strip() range_match = settings.RANGE_REGEX.match(range_header) size = os.stat(media_path).st_size if range_match: first_byte, last_byte = range_match.groups() first_byte = int(first_byte) if first_byte else 0 last_byte = int(last_byte) if last_byte else size - 1 if last_byte >= size: last_byte = size - 1 length = last_byte - first_byte + 1 response = StreamingHttpResponse( RangeFileResponse(open(media_path, 'rb'), offset=first_byte, length=length), status=206, content_type=mtype ) response['Content-Length'] = str(length) response['Content-Range'] = 'bytes {}-{}/{}'.format(first_byte, last_byte, size) else: response = StreamingHttpResponse(FileResponse(open(media_path, 'rb'))) response['content-length'] = size mtype = 'video/webm' if mtype == 'video/x-matroska' else mtype response['content-type'] = mtype response['mimetype'] = mtype response['Accept-Ranges'] = 'bytes' return response return HttpResponse(status=404) @classmethod def generate_archive_media_playlist(cls, server, usr, directory): qset = Library.objects.filter(usr=usr, directory=directory) pls_txt = '#EXTM3U\n' extset = set(['pdf', 'png', 'htm', 'html']) if not os.path.exists(settings.TMP_LOCATION): os.makedirs(settings.TMP_LOCATION) if os.path.isfile(cls.CACHE_FILE): with open(cls.CACHE_FILE, 'rb') as fd: cls.VIDEO_ID_DICT = pickle.load(fd) for row in qset: streaming_mode = False media_path = row.media_path media_element = row.media_element title = row.title if media_path and media_element: mdir, _ = os.path.split(media_path) filelist = os.listdir(mdir) mlist = [] for fl in filelist: ext = fl.rsplit('.', 1) if ext and ext[-1] not in extset: mlist.append(os.path.join(mdir, fl)) for mfile in mlist: if os.path.isfile(mfile) and os.stat(mfile).st_size: media_path = mfile streaming_mode = True break if media_path and os.path.exists(media_path): mtype = guess_type(media_path)[0] if not mtype: mtype = 'application/octet-stream' if streaming_mode: uid = str(uuid.uuid4()) uid = uid.replace('-', '') while uid in cls.VIDEO_ID_DICT: logger.debug("no unique ID, Generating again") uid = str(uuid.uuid4()) uid = uid.replace('-', '') time.sleep(0.01) cls.VIDEO_ID_DICT.update({uid:[media_path, time.time()]}) cls.VIDEO_ID_DICT.move_to_end(uid, last=False) if len(cls.VIDEO_ID_DICT) > settings.VIDEO_PUBLIC_LIST: cls.VIDEO_ID_DICT.popitem() title_slug = slugify(title, allow_unicode=True) return_path = '{}/{}/getarchivedvideo/{}-{}'.format(server, usr.username, title_slug, uid) pls_txt = pls_txt+'#EXTINF:0, {0}\n{1}\n'.format(title, return_path) with open(cls.CACHE_FILE, 'wb') as fd: pickle.dump(cls.VIDEO_ID_DICT, fd) uid = str(uuid.uuid4()) uid = uid.replace('-', '') plfile = os.path.join(settings.TMP_LOCATION, uid) if not os.path.isfile(plfile): with open(plfile, 'wb') as fd: pickle.dump(pls_txt, fd) pls_path = '/{}/getarchivedplaylist/{}/{}'.format(usr.username, directory, uid) logger.debug(pls_path) return pls_path @classmethod def read_customized(cls, usr, url_id): qlist = Library.objects.filter(usr=usr, id=url_id).select_related() data = b"<html>Not Available</html>" mtype = 'text/html' if qlist: row = qlist[0] media_path = row.media_path if media_path and os.path.exists(media_path): mtype = guess_type(media_path)[0] if mtype in cls.mtype_list: data = cls.format_html(row, media_path, custom_html=True) if mtype == 'text/plain': mtype = 'text/html' elif row.url: data = cls.get_content(row, url_id, media_path) response = HttpResponse() response['mimetype'] = mtype response['content-type'] = mtype response.write(data) return response @classmethod def get_content(cls, row, url_id, media_path): data = "" req = cls.vnt.get(row.url) media_path_parent, _ = os.path.split(media_path) if not os.path.exists(media_path_parent): os.makedirs(media_path_parent) if req and req.content_type and req.html: mtype = req.content_type.split(';')[0].strip() if mtype in cls.mtype_list: content = req.html with open(media_path, 'w') as fd: fd.write(content) data = cls.format_html( row, media_path, content=content, custom_html=True ) fav_nam = str(url_id) + '.ico' final_favicon_path = os.path.join(cls.fav_path, fav_nam) if not os.path.exists(final_favicon_path): cls.get_favicon_link(req.html, row.url, final_favicon_path) return data @classmethod def format_html(cls, row, media_path, content=None, custom_html=False): media_dir, file_path = os.path.split(media_path) resource_dir = os.path.join(settings.ARCHIVE_LOCATION, 'resources', str(row.id)) resource_link = '/{}/{}/{}/{}'.format(row.usr.username, row.directory, str(row.id), 'resources') if not os.path.exists(resource_dir): os.makedirs(resource_dir) if not content: content = "" with open(media_path, encoding='utf-8', mode='r') as fd: content = fd.read() soup = BeautifulSoup(content, 'lxml') for script in soup.find_all('script'): script.decompose() url_path = row.url ourl = urlparse(url_path) ourld = ourl.scheme + '://' + ourl.netloc link_list = soup.find_all(['a', 'link', 'img']) for link in link_list: if link.name == 'img': lnk = link.get('src', '') else: lnk = link.get('href', '') if lnk and lnk != '#': if link.name == 'img' or (link.name == 'link' and '.css' in lnk): lnk = dbxs.format_link(lnk, url_path) lnk_bytes = bytes(lnk, 'utf-8') h = hashlib.sha256(lnk_bytes) lnk_hash = h.hexdigest() if link.name == 'img': link['src'] = resource_link + '/' + lnk_hash if custom_html: link['class'] = 'card-img-top' else: lnk_hash = lnk_hash + '.css' link['href'] = resource_link + '/' + lnk_hash file_image = os.path.join(resource_dir, lnk_hash) if not os.path.exists(file_image): cls.vnt_noblock.get(lnk, out=file_image) logger.info('getting file: {}, out: {}'.format(lnk, file_image)) elif lnk.startswith('http'): pass else: nlnk = dbxs.format_link(lnk, url_path) if link.name == 'img': link['src'] = nlnk if custom_html: link['class'] = 'card-img-top' else: link['href'] = nlnk if custom_html: ndata = soup.prettify() if soup.title: title = soup.title.text else: title = row.url.rsplit('/')[-1] data = Document(ndata) data_sum = data.summary() if data_sum: nsoup = BeautifulSoup(data_sum, 'lxml') if nsoup.text.strip(): data = cls.custom_template(title, nsoup.prettify(), row) else: data = cls.custom_soup(ndata, title, row) else: data = cls.custom_soup(ndata, title, row) else: data = soup.prettify() return bytes(data, 'utf-8') @staticmethod def custom_template(title, content, row): if row: base_dir = '/{}/{}/{}'.format(row.usr.username, row.directory, row.id) read_url = base_dir + '/read' read_pdf = base_dir + '/read-pdf' read_png = base_dir + '/read-png' read_html = base_dir + '/read-html' else: read_url = read_pdf = read_png = read_html = '#' template = """ <html> <head> <meta charset="utf-8"> <title>{title}</title> <link rel="stylesheet" href="/static/css/bootstrap.min.css"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="referrer" content="no-referrer"> </head> <body> <div class="container-fluid"> <div class="row"> <div class="col-sm"></div> <div class="col-sm"> <div class='card text-left bg-light mb-3'> <div class='card-header'> <ul class="nav nav-tabs card-header-tabs"> <li class="nav-item"> <a class="nav-link active" href="{read_url}">HTML</a> </li> <li class="nav-item"> <a class="nav-link" href="{read_html}">Original</a> </li> <li class="nav-item"> <a class="nav-link" href="{read_pdf}">PDF</a> </li> <li class="nav-item"> <a class="nav-link" href="{read_png}">PNG</a> </li> </ul> </div> <div class='card-body'> <h5 class="card-title">{title}</h5> {content} </div> </div> </div> <div class="col-sm"></div> </div> </div> </body> </html> """.format(title=title, content=content, read_url=read_url, read_pdf=read_pdf, read_png=read_png, read_html=read_html) return template @classmethod def custom_soup(cls, data, title, row=None): soup = BeautifulSoup(data, 'lxml') text_result = soup.find_all(text=True) final_result = [] for elm in text_result: ntag = '' ptag = elm.parent.name if ptag == 'a': href = elm.parent.get('href') ntag = '<a href="{}">{}</a>'.format(href, elm) elif ptag in ['body', 'html', '[document]', 'img']: pass elif ptag == 'p': ntag = '<p class="card-text">{}</p>'.format(elm) elif ptag == 'span': ntag = '<span class="card-text">{}</span>'.format(elm) elif '\n' in elm: ntag = '</br>'; else: tag = elm.parent.name ntag = '<{tag}>{text}</{tag}>'.format(tag=tag, text=elm) if ntag: final_result.append(ntag) result = ''.join(final_result) result = re.sub(r'(</br>)+', '', result) content = cls.custom_template(title, result, row) return content @classmethod def get_favicon_link(cls, data, url_name, final_favicon_path): soup = BeautifulSoup(data, 'lxml') favicon_link = '' if not os.path.exists(final_favicon_path): links = soup.find_all('link') ilink = soup.find('link', {'rel':'icon'}) slink = soup.find('link', {'rel':'shortcut icon'}) if ilink: favicon_link = dbxs.format_link(ilink.get('href'), url_name) elif slink: favicon_link = dbxs.format_link(slink.get('href'), url_name) else: for i in links: rel = i.get('href') if (rel and (rel.endswith('.ico') or '.ico' in rel)): favicon_link = dbxs.format_link(rel, url_name) if not favicon_link: urlp = urlparse(url_name) favicon_link = urlp.scheme + '://' + urlp.netloc + '/favicon.ico' if favicon_link: cls.vnt_noblock.get(favicon_link, out=final_favicon_path) @classmethod def is_human_readable(cls, mtype): human_readable = False if mtype in cls.readable_format: human_readable = True return human_readable
class ImportBookmarks: vnt = Vinanti(block=False, hdrs={'User-Agent': settings.USER_AGENT}, max_requests=settings.VINANTI_MAX_REQUESTS, backend=settings.VINANTI_BACKEND) vnt_task = Vinanti(block=False, group_task=False, backend='function', multiprocess=settings.MULTIPROCESS_VINANTI, max_requests=settings.MULTIPROCESS_VINANTI_MAX_REQUESTS) @classmethod def import_bookmarks(cls, usr, settings_row, import_file, mode='file'): book_dict = cls.convert_bookmark_to_dict(import_file, mode=mode) if not os.path.exists(settings.FAVICONS_STATIC): os.makedirs(settings.FAVICONS_STATIC) insert_links_list = [] insert_dir_list = [] url_list = [] for dirname in book_dict: if '/' in dirname or ':' in dirname: dirname = re.sub(r'/|:', '-', dirname) if dirname: qdir = Library.objects.filter(usr=usr, directory=dirname) if not qdir: dirlist = Library(usr=usr, directory=dirname, timestamp=timezone.now()) insert_dir_list.append(dirlist) if insert_dir_list: Library.objects.bulk_create(insert_dir_list) uqlist = Library.objects.filter(usr=usr).only('directory', 'url') urlset = set() if uqlist: urlset = set([(i.directory, i.url) for i in uqlist if i.url]) for dirname, links in book_dict.items(): for val in links: url, icon_u, add_date, title, descr = val url_tuple = (dirname, url) if url_tuple not in urlset: logger.info(val) add_date = datetime.fromtimestamp(int(add_date)) lib = Library(usr=usr, directory=dirname, url=url, icon_url=icon_u, timestamp=add_date, title=title, summary=descr) insert_links_list.append(lib) url_list.append(url) else: logger.info('{}-->{}; already exists'.format(dirname, url)) cls.insert_in_bulk(usr, settings_row, insert_links_list, url_list) @classmethod def insert_in_bulk(cls, usr, settings_row, insert_links_list, url_list): if insert_links_list: Library.objects.bulk_create(insert_links_list) qlist = Library.objects.filter(usr=usr, url__in=url_list) row_list = [] for row in qlist: icon_url = row.icon_url row_id = row.id url = row.url if url: row.media_path = cls.get_media_path(url, row_id) final_favicon_path = os.path.join(settings.FAVICONS_STATIC, str(row_id) + '.ico') row_list.append((row.icon_url, final_favicon_path)) row.save() for iurl, dest in row_list: if iurl and iurl.startswith('http'): cls.vnt.get(iurl, out=dest) if (settings_row and (settings_row.auto_archive or settings_row.auto_summary or settings_row.autotag)): for row in qlist: if row.url: dbxs.process_add_url(usr, row.url, row.directory, archive_html=False, row=row, settings_row=settings_row, media_path=row.media_path) @staticmethod def get_media_path(url, row_id): content_type = guess_type(url)[0] if content_type and content_type == 'text/plain': ext = '.txt' elif content_type: ext = guess_extension(content_type) else: ext = '.htm' out_dir = ext[1:].upper() out_title = str(row_id) + str(ext) media_dir = os.path.join(settings.ARCHIVE_LOCATION, out_dir) if not os.path.exists(media_dir): os.makedirs(media_dir) media_path_parent = os.path.join(media_dir, str(row_id)) if not os.path.exists(media_path_parent): os.makedirs(media_path_parent) media_path = os.path.join(media_path_parent, out_title) return media_path @staticmethod def convert_bookmark_to_dict(import_file, mode='file'): links_dict = {} if mode == 'file': content = "" with open(import_file, 'r', encoding='utf-8') as fd: content = fd.read() else: content = import_file if content: content = re.sub('ICON="(.*?)"', "", content) ncontent = re.sub('\n', " ", content) links_group = re.findall('<DT><H3(.*?)/DL>', ncontent) nsr = 0 nlinks = [] for i, j in enumerate(links_group): j = j + '<DT>' nlinks.clear() dirfield = re.search('>(?P<dir>.*?)</H3>', j) if dirfield: dirname = html.unescape(dirfield.group('dir')) else: dirname = 'Unknown' links = re.findall('A HREF="(?P<url>.*?)"(?P<extra>.*?)<DT>', j) for url, extra in links: dt = re.search('ADD_DATE="(?P<add_date>.*?)"', extra) add_date = dt.group('add_date') dt = re.search('ICON_URI="(?P<icon>.*?)"', extra) if dt: icon_u = dt.group('icon') else: icon_u = '' dt = re.search('>(?P<title>.*?)</A>', extra) if dt: title = html.unescape(dt.group('title')) else: title = 'No Title' dt = re.search('<DD>(?P<descr>.*?)(<DT>)?', extra) if dt: descr = html.unescape(dt.group('descr')) else: descr = 'Not Available' logger.debug(url) nlinks.append((url, icon_u, add_date, title, descr)) if dirname in links_dict: dirname = '{}-{}'.format(dirname, nsr) nsr += 1 links_dict.update({dirname: nlinks.copy()}) return links_dict
class DBAccess: vnt = Vinanti(block=False, hdrs={'User-Agent': settings.USER_AGENT}, max_requests=settings.VINANTI_MAX_REQUESTS, backend=settings.VINANTI_BACKEND, timeout=300) vntbook = Vinanti(block=False, hdrs={'User-Agent': settings.USER_AGENT}, max_requests=settings.VINANTI_MAX_REQUESTS, backend=settings.VINANTI_BACKEND, timeout=300) vnt_task = Vinanti(block=False, group_task=False, backend='function', multiprocess=settings.MULTIPROCESS_VINANTI, max_requests=settings.MULTIPROCESS_VINANTI_MAX_REQUESTS) @classmethod def add_new_url(cls, usr, request, directory, row): url_name = request.POST.get('add_url', '') if url_name: if url_name.startswith('md:'): url_name = url_name[3:].strip() archive_html = True media_element = True else: archive_html = False media_element = False if row: settings_row = row[0] else: settings_row = None url_list = Library.objects.filter(usr=usr, directory=directory, url=url_name) if not url_list and url_name: cls.process_add_url(usr, url_name, directory, archive_html, settings_row=settings_row, media_element=media_element) @classmethod def process_add_url(cls, usr, url_name, directory, archive_html, row=None, settings_row=None, media_path=None, media_element=False): part = partial(cls.url_fetch_completed, usr, url_name, directory, archive_html, row, settings_row, media_path, media_element) if row: cls.vntbook.get(url_name, onfinished=part) else: cls.vnt.get(url_name, onfinished=part) @classmethod def url_fetch_completed(cls, usr, url_name, directory, archive_html, row, settings_row, media_path, media_element, *args): ext = None save = False save_text = False favicon_link = None final_og_link = None summary = 'none' req = args[-1] tags_list = [] save_summary = False if req and req.content_type: if ';' in req.content_type: content_type = req.content_type.split(';')[0].strip() else: content_type = req.content_type if content_type == 'text/plain': ext = '.txt' else: ext = guess_extension(content_type) print(content_type, '------', ext) if req and req.html and not req.binary: if 'text/html' in req.content_type: soup = BeautifulSoup(req.html, 'html.parser') if soup.title: title = soup.title.text if title.lower() == 'youtube': try_srch = re.search('document.title[^;]*', req.html) if try_srch: title = try_srch.group().replace( 'document.title = ', '') else: title = url_name.rsplit('/')[-1] ilink = soup.find('link', {'rel': 'icon'}) slink = soup.find('link', {'rel': 'shortcut icon'}) mlink = soup.find('meta', {'property': 'og:image'}) if mlink: final_og_link = mlink.get('content', '') if ilink: favicon_link = cls.format_link(ilink.get('href'), url_name) elif slink: favicon_link = cls.format_link(slink.get('href'), url_name) else: for link in soup.find_all('link'): rel = link.get('href') if (rel and (rel.endswith('.ico') or '.ico' in rel)): favicon_link = cls.format_link(rel, url_name) if not favicon_link: urlp = urlparse(url_name) favicon_link = urlp.scheme + '://' + urlp.netloc + '/favicon.ico' if archive_html or (settings_row and settings_row.auto_archive): save_text = True if settings_row and (settings_row.autotag or settings_row.auto_summary): summary, tags_list = Summarizer.get_summary_and_tags( req.html, settings_row.total_tags) else: title = url_name.rsplit('/')[-1] save = True elif req and req.binary: title = url_name.rsplit('/')[-1] save = True else: ext = '.bin' title = url_name.rsplit('/', 1)[-1] if row is None: row = Library.objects.create(usr=usr, directory=directory, url=url_name, title=title, summary=summary, timestamp=timezone.now(), media_element=media_element) else: logger.debug('row - exists') if not media_path: if ext and ext.startswith('.'): out_dir = ext[1:].upper() else: out_dir = str(ext).upper() if not ext: print(req.content_type) out_title = str(row.id) + str(ext) media_dir = os.path.join(settings.ARCHIVE_LOCATION, out_dir) if not os.path.exists(media_dir): os.makedirs(media_dir) if not os.path.exists(settings.FAVICONS_STATIC): os.makedirs(settings.FAVICONS_STATIC) media_path_parent = os.path.join(media_dir, str(row.id)) final_favicon_path = os.path.join(settings.FAVICONS_STATIC, str(row.id) + '.ico') final_og_image_path = os.path.join(settings.FAVICONS_STATIC, str(row.id) + '.png') media_path = os.path.join(media_path_parent, out_title) row.media_path = media_path row.save() if favicon_link: cls.vnt.get(favicon_link, out=final_favicon_path) if final_og_link: cls.vnt.get(final_og_link, out=final_og_image_path) elif media_path and row: final_favicon_path = os.path.join(settings.FAVICONS_STATIC, str(row.id) + '.ico') final_og_image_path = os.path.join(settings.FAVICONS_STATIC, str(row.id) + '.png') media_path_parent, out_title = os.path.split(media_path) if settings_row and settings_row.auto_summary and summary: row.summary = summary if settings_row and not tags_list: row.save() else: save_summary = True if not os.path.exists(final_favicon_path) and favicon_link: cls.vnt.get(favicon_link, out=final_favicon_path) if not os.path.exists(final_og_image_path) and final_og_link: cls.vnt.get(final_og_link, out=final_og_image_path) if save or save_text: if not os.path.exists(media_path_parent): os.makedirs(media_path_parent) if save: #req.save(req.request_object, media_path) cls.vnt.get(url_name, out=media_path) else: with open(media_path, 'w') as fd: fd.write(req.html) if settings_row and ext in ['.htm', '.html']: cls.convert_html_pdf(media_path_parent, settings_row, row, url_name, media_path, media_element) if settings_row and tags_list: if save_summary: cls.edit_tags(usr, row.id, ','.join(tags_list), '', old_row=row) else: cls.edit_tags(usr, row.id, ','.join(tags_list), '') return row.id @classmethod def save_in_binary_format(cls, usr, request, directory): url_list = [] for key, value in request.FILES.items(): title = value.name content = value.read() ext = None content_type = guess_type(title)[0] if content_type and content_type == 'text/plain': ext = '.txt' elif content_type: ext = guess_extension(content_type) print(content_type, '------', ext) if not ext: ext = '.bin' out_dir = ext[1:].upper() row = Library.objects.create(usr=usr, directory=directory, title=title, timestamp=timezone.now()) out_title = str(row.id) + str(ext) media_dir = os.path.join(settings.ARCHIVE_LOCATION, out_dir) if not os.path.exists(media_dir): os.makedirs(media_dir) media_path_parent = os.path.join(media_dir, str(row.id)) if not os.path.exists(media_path_parent): os.makedirs(media_path_parent) media_path = os.path.join(media_path_parent, out_title) row.media_path = media_path url = '/{}/{}/{}/archive'.format(usr.username, directory, row.id) row.url = url row.save() with open(media_path, 'wb') as fd: fd.write(content) url_list.append(url) return url_list @classmethod def convert_html_pdf(cls, media_path_parent, settings_row, row, url_name, media_path, media_element): if settings_row.save_pdf: pdf = os.path.join(media_path_parent, str(row.id) + '.pdf') cmd = [ 'wkhtmltopdf', '--custom-header', 'User-Agent', settings.USER_AGENT, '--javascript-delay', '500', '--load-error-handling', 'ignore', url_name, pdf ] if settings.USE_XVFB: cmd = ['xvfb-run', '--auto-servernum'] + cmd if settings.USE_CELERY: cls.convert_to_pdf_png.delay(cmd) else: cls.vnt_task.function(cls.convert_to_pdf_png_task, cmd, onfinished=partial( cls.finished_processing, 'pdf')) if settings_row.save_png: png = os.path.join(media_path_parent, str(row.id) + '.png') cmd = [ 'wkhtmltoimage', '--quality', str(settings_row.png_quality), '--custom-header', 'User-Agent', settings.USER_AGENT, '--javascript-delay', '500', '--load-error-handling', 'ignore', url_name, png ] if settings.USE_XVFB: cmd = ['xvfb-run', '--auto-servernum'] + cmd if settings.USE_CELERY: cls.convert_to_pdf_png.delay(cmd) else: cls.vnt_task.function(cls.convert_to_pdf_png_task, cmd, onfinished=partial( cls.finished_processing, 'image')) if media_element or row.media_element: out = os.path.join(media_path_parent, str(row.id) + '.mp4') cmd_str = settings_row.download_manager.format(iurl=url_name, output=out) cmd = cmd_str.split() logger.debug(cmd) if cmd and cmd[0] in settings.DOWNLOAD_MANAGERS_ALLOWED: if settings.USE_CELERY: cls.convert_to_pdf_png.delay(cmd) else: cls.vnt_task.function(cls.convert_to_pdf_png_task, cmd, onfinished=partial( cls.finished_processing, 'media')) @classmethod def convert_html_pdf_with_chromium(cls, media_path_parent, settings_row, row, url_name, media_path, mode='pdf'): if mode == 'pdf': pdf = os.path.join(media_path_parent, str(row.id) + '.pdf') cmd = [ 'chromium', '--headless', '--disable-gpu', '--print-to-pdf={}'.format(pdf), url_name ] if not settings.CHROMIUM_SANDBOX: cmd.insert(1, '--no-sandbox') if settings.USE_CELERY: cls.convert_to_pdf_png.delay(cmd) else: cls.vnt_task.function(cls.convert_to_pdf_png_task, cmd, onfinished=partial( cls.finished_processing, 'pdf')) elif mode == 'dom': htm = os.path.join(media_path_parent, str(row.id) + '.htm') cmd = [ 'chromium', '--headless', '--disable-gpu', '--dump-dom', url_name ] if not settings.CHROMIUM_SANDBOX: cmd.insert(1, '--no-sandbox') if settings.USE_CELERY: cls.getdom_chromium.delay(cmd, htm) else: cls.vnt_task.function(cls.getdom_task_chromium, cmd, htm, onfinished=partial( cls.finished_processing, 'html')) def getdom_task_chromium(cmd, htm): if os.name == 'posix': output = subprocess.check_output(cmd) else: output = subprocess.check_output(cmd, shell=True) with open(htm, 'wb') as fd: fd.write(output) return True @task(name="convert-to-pdf-png") def getdom_chromium(cmd, htm): if os.name == 'posix': output = subprocess.check_output(cmd) else: output = subprocess.check_output(cmd, shell=True) with open(htm, 'wb') as fd: fd.write(output) @classmethod def finished_processing(cls, val, *args): logger.info('{}-->>>>finished--->>>{}'.format(val, args)) def convert_to_pdf_png_task(cmd): if os.name == 'posix': subprocess.call(cmd) else: subprocess.call(cmd, shell=True) return True @task(name="convert-to-pdf-png") def convert_to_pdf_png(cmd): if os.name == 'posix': subprocess.call(cmd) else: subprocess.call(cmd, shell=True) @staticmethod def get_rows_by_directory(usr, directory=None, search=None, search_mode='title'): usr_list = [] if search and search_mode != 'dir': if search_mode == 'title': usr_list = Library.objects.filter( usr=usr, title__icontains=search).order_by('-timestamp') elif search_mode == 'url': usr_list = Library.objects.filter( usr=usr, url__icontains=search).order_by('-timestamp') elif search_mode == 'tag': usr_list = Library.objects.filter( usr=usr, tags__icontains=search).order_by('-timestamp') elif search_mode == 'summary': usr_list = Library.objects.filter( usr=usr, summary__icontains=search).order_by('-timestamp') else: if not directory and search and search_mode == 'dir': directory = search usr_list = Library.objects.filter( usr=usr, directory=directory).order_by('-timestamp') nusr_list = [] for row in usr_list: if row.url: if not row.tags: tags = [] else: tags = row.tags.split(',') nusr_list.append( (row.title, row.url, row.id, row.timestamp, tags, row.directory, row.media_path, row.media_element)) return nusr_list @staticmethod def get_rows_by_tag(usr, tagname): tagobj = Tags.objects.filter(tag=tagname) directory = 'tag' usr_list = [] if tagobj: usr_list = URLTags.objects.select_related('url_id').filter( usr_id=usr, tag_id=tagobj[0]) udict = {} tag_list = [tagname] for i in usr_list: uid = i.url_id.url dirname = i.url_id.directory udict.update({ uid: [ i.url_id.title, uid, i.url_id.id, i.url_id.timestamp, [tagname], dirname, i.url_id.media_path, i.url_id.media_element ] }) usr_list = [tuple(value) for key, value in udict.items()] return usr_list else: return None @staticmethod def populate_usr_list(usr, usr_list, create_dict=False): if create_dict: nlist = {} else: nlist = [] index = 1 username = usr.username for title, url, idd, timestamp, tag, directory, media_path, media_element in usr_list: title = re.sub('_|-', ' ', title) title = re.sub('/', ' / ', title) base_dir = '/{}/{}/{}'.format(usr, directory, idd) base_remove = base_dir + '/remove' base_et = base_dir + '/edit-bookmark' move_single = base_dir + '/move-bookmark' move_multiple = base_dir + '/move-bookmark-multiple' base_eu = base_dir + '/edit-url' read_url = base_dir + '/read' if media_path and os.path.exists(media_path): archive_media = base_dir + '/archive' else: archive_media = url netloc = urlparse(url).netloc if len(netloc) > 20: netloc = netloc[:20] + '..' timestamp = timestamp.strftime("%d %b %Y") final_favicon_path = os.path.join(settings.FAVICONS_STATIC, str(idd) + '.ico') if os.path.exists(final_favicon_path): fav_path = settings.STATIC_URL + 'favicons/{}.ico'.format(idd) else: fav_path = "" if create_dict: nlist.update({ index: { 'title': title, 'netloc': netloc, 'url': url, 'edit-bookmark': base_et, 'remove-url': base_remove, 'timestamp': timestamp, 'tag': tag, 'move-bookmark': move_single, 'move-multi': move_multiple, 'usr': username, 'archive-media': archive_media, 'directory': directory, 'read-url': read_url, 'id': idd, 'fav-path': fav_path, 'media-element': media_element } }) else: nlist.append([ index, title, netloc, url, base_et, base_remove, timestamp, tag, move_single, move_multiple, archive_media, directory, read_url, idd, fav_path, media_element ]) index += 1 return nlist @staticmethod def format_link(lnk, url): ourl = urlparse(url) ourld = ourl.scheme + '://' + ourl.netloc if lnk and lnk != '#': if lnk.startswith('//'): lnk = ourl.scheme + ':' + lnk elif lnk.startswith('/'): lnk = ourld + lnk elif lnk.startswith('./'): lnk = url.rsplit('/', 1)[0] + lnk[1:] elif lnk.startswith('../'): lnk = url.rsplit('/', 2)[0] + lnk[2:] elif not lnk.startswith('http'): lnk = ourld + '/' + lnk return lnk @staticmethod def remove_url_link(usr, url_id=None, row=None): if row: url_id = row.id elif url_id: qlist = Library.objects.filter(usr=usr, id=url_id) if qlist: row = qlist[0] if row: media_path = row.media_path if media_path and os.path.exists(media_path): base_dir_url, file_name = os.path.split(media_path) base_dir_id, dir_id = os.path.split(base_dir_url) resource_dir = os.path.join(settings.ARCHIVE_LOCATION, 'resources', str(url_id)) if dir_id.isnumeric(): ndir_id = int(dir_id) if ndir_id == url_id: shutil.rmtree(base_dir_url) logger.info('removing {}'.format(base_dir_url)) if os.path.exists(resource_dir): shutil.rmtree(resource_dir) logger.info('removing {}'.format(resource_dir)) row.delete() @staticmethod def move_bookmarks(usr, request, url_id=None, single=True): msg = 'Nothing Moved' if single and url_id: move_to_dir = request.POST.get('move_to_dir', '') print(url_id, request.POST) if move_to_dir: Library.objects.filter(usr=usr, id=url_id).update(directory=move_to_dir) msg = 'Moved to {}'.format(move_to_dir) elif not single: move_to_dir = request.POST.get('move_to_dir', '') move_links = request.POST.get('move_links', '') if move_links: move_links_list = [ i.strip() for i in move_links.split(',') if i.strip() ] else: move_links_list = [] if move_to_dir and move_links_list: for link in move_links_list: if link.isnumeric(): link_id = int(link) Library.objects.filter( usr=usr, id=link_id).update(directory=move_to_dir) msg = 'Moved {1} links to {0}'.format(move_to_dir, len(move_links_list)) return msg @classmethod def group_links_actions(cls, usr, request, dirname, mode=None): msg = 'Nothing' links = request.POST.get('link_ids', '') link_tags = request.POST.get('link_tags', '') merge_dir = request.POST.get('merge_dir', '') if links: links_list = [i.strip() for i in links.split(',') if i.strip()] else: links_list = [] if link_tags: tags_list = [i.strip() for i in link_tags.split(',') if i.strip()] else: tags_list = [] qlist = UserSettings.objects.filter(usrid=usr) if qlist: set_row = qlist[0] else: set_row = None for link in links_list: if link.isnumeric(): link_id = int(link) qset = Library.objects.filter(usr=usr, id=link_id) if qset: row = qset[0] if mode == 'archive': cls.process_add_url(usr, row.url, dirname, archive_html=True, row=row, settings_row=set_row) elif mode == 'tags' and tags_list: cls.edit_tags(usr, row.id, ','.join(tags_list), '') if merge_dir and merge_dir != dirname and mode == 'merge': qlist = Library.objects.filter(usr=usr, directory=dirname) qlistm = Library.objects.filter(usr=usr, directory=merge_dir) merge_list = set([row.url for row in qlistm if row.url]) for row in qlist: if not row.url or row.url in merge_list: row.delete() Library.objects.filter( usr=usr, directory=dirname).update(directory=merge_dir) return msg @staticmethod def edit_bookmarks(usr, request, url_id): title = request.POST.get('new_title', '') nurl = request.POST.get('new_url', '') tags = request.POST.get('new_tags', '') tags_old = request.POST.get('old_tags', '') media_link = request.POST.get('media_link', '') print(url_id, request.POST) msg = 'Edited' if media_link and media_link == 'true': media_element = True else: media_element = False if title and nurl: Library.objects.filter(usr=usr, id=url_id).update( title=title, url=nurl, media_element=media_element) msg = msg + ' Title and Link' elif title: Library.objects.filter(usr=usr, id=url_id).update( title=title, media_element=media_element) msg = msg + ' Title' elif nurl: Library.objects.filter(usr=usr, id=url_id).update( url=nurl, media_element=media_element) msg = msg + ' Link' else: Library.objects.filter( usr=usr, id=url_id).update(media_element=media_element) if tags or tags_old: msg = DBAccess.edit_tags(usr, url_id, tags, tags_old) return msg @staticmethod def edit_tags(usr, url_id, tags, tags_old, old_row=None): tags_list = [i.lower().strip() for i in tags.split(',')] tags_list_library = ','.join(list(set(tags_list))) tags_list_old = [i.lower().strip() for i in tags_old.split(',')] tags_list = [i for i in tags_list if i] tags_list_old = [i for i in tags_list_old if i] all_tags = Tags.objects.filter(tag__in=tags_list) tags_new_add = set(tags_list) - set(tags_list_old) tags_old_delete = set(tags_list_old) - set(tags_list) insert_list = [] for tag in tags_list: if not all_tags.filter(tag=tag).exists(): insert_list.append(Tags(tag=tag)) else: logger.info('Tag: {} exists'.format(tag)) if insert_list: Tags.objects.bulk_create(insert_list) if old_row: lib_obj = old_row else: lib_list = Library.objects.filter(usr=usr, id=url_id) lib_obj = lib_list[0] lib_obj.tags = tags_list_library lib_obj.save() tagins_list = [] for tag in tags_new_add: tag_obj = Tags.objects.filter(tag=tag) tagid = URLTags.objects.filter(usr_id=usr, url_id=lib_obj, tag_id=tag_obj[0]) if not tagid: row = tagins_list.append( URLTags(usr_id=usr, url_id=lib_obj, tag_id=tag_obj[0])) if tagins_list: URLTags.objects.bulk_create(tagins_list) for tag in tags_old_delete: tag_obj = Tags.objects.filter(tag=tag) tagid = URLTags.objects.filter(usr_id=usr, url_id=lib_obj, tag_id=tag_obj[0]) if tagid: URLTags.objects.filter(usr_id=usr, url_id=lib_obj, tag_id=tag_obj[0]).delete() msg = ('Edited Tags: new-tags-addition={}::old-tags-delete={}'.format( tags_new_add, tags_old_delete)) logger.info(msg) return msg
class CustomRead: readable_format = [ 'text/plain', 'text/html', 'text/htm', 'text/css', 'application/xhtml+xml', 'application/xml', 'application/json', ] mtype_list = [ 'text/htm', 'text/html', 'text/plain' ] vnt_noblock = Vinanti(block=False, hdrs={'User-Agent':settings.USER_AGENT}, backend=settings.VINANTI_BACKEND, max_requests=settings.VINANTI_MAX_REQUESTS) vnt = Vinanti(block=True, hdrs={'User-Agent':settings.USER_AGENT}) fav_path = settings.FAVICONS_STATIC VIDEO_ID_DICT = OrderedDict() CACHE_FILE = os.path.join(settings.TMP_LOCATION, 'cache') ANNOTATION_SCRIPT = """ var pageUri = function () { return { beforeAnnotationCreated: function (ann) { ann.uri = window.location.href; } }; }; var app = new annotator.App(); var loc = '/annotate' var csrftoken = getCookie('csrftoken'); app.include(annotator.ui.main, {element: document.body}); app.include(annotator.storage.http, {prefix: loc, headers: {"X-CSRFToken": csrftoken} }); app.include(pageUri); app.start().then(function () { app.annotations.load({uri: window.location.pathname}); }); function getCookie(name) { var cookieValue = null; if (document.cookie && document.cookie !== '') { var cookies = document.cookie.split(';'); for (var i = 0; i < cookies.length; i++) { var cookie = jQuery.trim(cookies[i]); // Does this cookie string begin with the name we want? if (cookie.substring(0, name.length + 1) === (name + '=')) { cookieValue = decodeURIComponent(cookie.substring(name.length + 1)); break; } } } return cookieValue; }; """ @classmethod def get_archived_file(cls, usr, url_id, mode='html', req=None, return_path=False): qset = Library.objects.filter(usr=usr, id=url_id) streaming_mode = False if not os.path.exists(settings.TMP_LOCATION): os.makedirs(settings.TMP_LOCATION) if qset: row = qset[0] media_path = row.media_path if mode in ['pdf', 'png', 'html'] and media_path: fln, ext = media_path.rsplit('.', 1) if mode == 'pdf': media_path = fln + '.pdf' elif mode == 'png': media_path = fln + '.png' elif mode == 'html': media_path = fln + '.htm' elif mode == 'archive' and media_path: mdir, _ = os.path.split(media_path) filelist = os.listdir(mdir) mlist = [] extset = set(['pdf', 'png', 'htm', 'html', 'json']) for fl in filelist: ext = fl.rsplit('.', 1) if ext and ext[-1] not in extset: mlist.append(os.path.join(mdir, fl)) for mfile in mlist: if os.path.isfile(mfile) and os.stat(mfile).st_size: media_path = mfile streaming_mode = True break if streaming_mode and req: qlist = UserSettings.objects.filter(usrid=usr) if qlist and not qlist[0].media_streaming: streaming_mode = False if media_path and os.path.exists(media_path): mtype = guess_type(media_path)[0] if not mtype: mtype = 'application/octet-stream' ext = media_path.rsplit('.')[-1] if ext: filename = row.title + '.' + ext if '.' in row.title: file_ext = row.title.rsplit('.', 1)[-1] if ext == file_ext: filename = row.title else: filename = row.title + '.bin' if mtype in ['text/html', 'text/htm']: data = cls.format_html(row, media_path) return HttpResponse(data) elif streaming_mode: if os.path.isfile(cls.CACHE_FILE): with open(cls.CACHE_FILE, 'rb') as fd: cls.VIDEO_ID_DICT = pickle.load(fd) uid = str(uuid.uuid4()) uid = uid.replace('-', '') while uid in cls.VIDEO_ID_DICT: logger.debug("no unique ID, Generating again") uid = str(uuid.uuid4()) uid = uid.replace('-', '') time.sleep(0.01) cls.VIDEO_ID_DICT.update({uid:[media_path, time.time()]}) cls.VIDEO_ID_DICT.move_to_end(uid, last=False) if len(cls.VIDEO_ID_DICT) > settings.VIDEO_PUBLIC_LIST: cls.VIDEO_ID_DICT.popitem() with open(cls.CACHE_FILE, 'wb') as fd: pickle.dump(cls.VIDEO_ID_DICT, fd) if return_path: title_slug = slugify(row.title, allow_unicode=True) if settings.ROOT_URL_LOCATION: root_loc = settings.ROOT_URL_LOCATION if root_loc.startswith('/'): root_loc = root_loc[1:] return '{}/{}/getarchivedvideo/{}-{}'.format(root_loc, usr.username, title_slug, uid) else: return '{}/getarchivedvideo/{}-{}'.format(usr.username, title_slug, uid) else: return cls.get_archived_video(req, usr.username, uid) else: response = FileResponse(open(media_path, 'rb')) mtype = 'video/webm' if mtype == 'video/x-matroska' else mtype response['mimetype'] = mtype response['content-type'] = mtype response['content-length'] = os.stat(media_path).st_size filename = filename.replace(' ', '.') logger.info('{} , {}'.format(filename, mtype)) if not cls.is_human_readable(mtype) and not streaming_mode: response['Content-Disposition'] = 'attachment; filename="{}"'.format(quote(filename)) return response else: back_path = req.path_info.rsplit('/', 1)[0] + '/read' return render(req, 'archive_not_found.html', {'path':back_path}) else: return HttpResponse(status=404) @classmethod def get_archived_video(cls, request, username, video_id): if video_id in cls.VIDEO_ID_DICT: media_path, ltime = cls.VIDEO_ID_DICT.get(video_id) logger.debug('{} {}'.format(media_path, ltime)) if time.time() - ltime <= settings.VIDEO_ID_EXPIRY_LIMIT*3600: if os.path.isfile(media_path): mtype = guess_type(media_path)[0] if not mtype: mtype = 'application/octet-stream' range_header = request.META.get('HTTP_RANGE', '').strip() range_match = settings.RANGE_REGEX.match(range_header) size = os.stat(media_path).st_size if range_match: first_byte, last_byte = range_match.groups() first_byte = int(first_byte) if first_byte else 0 last_byte = int(last_byte) if last_byte else size - 1 if last_byte >= size: last_byte = size - 1 length = last_byte - first_byte + 1 response = StreamingHttpResponse( RangeFileResponse(open(media_path, 'rb'), offset=first_byte, length=length), status=206, content_type=mtype ) response['Content-Length'] = str(length) response['Content-Range'] = 'bytes {}-{}/{}'.format(first_byte, last_byte, size) else: response = StreamingHttpResponse(FileResponse(open(media_path, 'rb'))) response['content-length'] = size mtype = 'video/webm' if mtype == 'video/x-matroska' else mtype response['content-type'] = mtype response['mimetype'] = mtype response['Accept-Ranges'] = 'bytes' return response return HttpResponse(status=404) @classmethod def generate_archive_media_playlist(cls, server, usr, directory): qset = Library.objects.filter(usr=usr, directory=directory) pls_txt = '#EXTM3U\n' extset = set(['pdf', 'png', 'htm', 'html']) if not os.path.exists(settings.TMP_LOCATION): os.makedirs(settings.TMP_LOCATION) if os.path.isfile(cls.CACHE_FILE): with open(cls.CACHE_FILE, 'rb') as fd: cls.VIDEO_ID_DICT = pickle.load(fd) for row in qset: streaming_mode = False media_path = row.media_path media_element = row.media_element title = row.title if media_path and media_element: mdir, _ = os.path.split(media_path) filelist = os.listdir(mdir) mlist = [] for fl in filelist: ext = fl.rsplit('.', 1) if ext and ext[-1] not in extset: mlist.append(os.path.join(mdir, fl)) for mfile in mlist: if os.path.isfile(mfile) and os.stat(mfile).st_size: media_path = mfile streaming_mode = True break if media_path and os.path.exists(media_path): mtype = guess_type(media_path)[0] if not mtype: mtype = 'application/octet-stream' if streaming_mode: uid = str(uuid.uuid4()) uid = uid.replace('-', '') while uid in cls.VIDEO_ID_DICT: logger.debug("no unique ID, Generating again") uid = str(uuid.uuid4()) uid = uid.replace('-', '') time.sleep(0.01) cls.VIDEO_ID_DICT.update({uid:[media_path, time.time()]}) cls.VIDEO_ID_DICT.move_to_end(uid, last=False) if len(cls.VIDEO_ID_DICT) > settings.VIDEO_PUBLIC_LIST: cls.VIDEO_ID_DICT.popitem() title_slug = slugify(title, allow_unicode=True) if settings.ROOT_URL_LOCATION: root_loc = settings.ROOT_URL_LOCATION if root_loc.startswith('/'): root_loc = root_loc[1:] return_path = '{}/{}/{}/getarchivedvideo/{}-{}'.format(server, root_loc, usr.username, title_slug, uid) else: return_path = '{}/{}/getarchivedvideo/{}-{}'.format(server, usr.username, title_slug, uid) pls_txt = pls_txt+'#EXTINF:0, {0}\n{1}\n'.format(title, return_path) with open(cls.CACHE_FILE, 'wb') as fd: pickle.dump(cls.VIDEO_ID_DICT, fd) uid = str(uuid.uuid4()) uid = uid.replace('-', '') plfile = os.path.join(settings.TMP_LOCATION, uid) if not os.path.isfile(plfile): with open(plfile, 'wb') as fd: pickle.dump(pls_txt, fd) pls_path = '{}/{}/getarchivedplaylist/{}/playlist/{}'.format(settings.ROOT_URL_LOCATION, usr.username, directory, uid) logger.debug(pls_path) return pls_path @classmethod def read_customized(cls, usr, url_id, mode='read', req=None): qlist = Library.objects.filter(usr=usr, id=url_id).select_related() data = b"<html>Not Available</html>" mtype = 'text/html' if qlist: row = qlist[0] media_path = row.media_path if mode in ['read-default', 'read-dark', 'read-light', 'read-gray']: if mode == 'read-dark': row.reader_mode = UserSettings.DARK elif mode == 'read-light': row.reader_mode = UserSettings.LIGHT elif mode == 'read-gray': row.reader_mode = UserSettings.GRAY else: row.reader_mode = UserSettings.WHITE row.save() if media_path and os.path.exists(media_path): mtype = guess_type(media_path)[0] if mtype in cls.mtype_list or media_path.endswith(".bin") or media_path.endswith(".note"): if media_path.endswith(".bin"): html = media_path.rsplit(".", 1)[0] + ".htm" if os.path.exists(html): media_path = html mtype = "text/html" data = cls.format_html(row, media_path, custom_html=True) if mtype == 'text/plain' or media_path.endswith(".bin") or media_path.endswith(".note"): mtype = 'text/html' elif row.url: data = cls.get_content(row, url_id, media_path) response = HttpResponse() response['mimetype'] = mtype response['content-type'] = mtype response.write(data) return response @classmethod def read_customized_note(cls, usr, url_id, mode='read-note', req=None): qlist = Library.objects.filter(usr=usr, id=url_id).select_related() data = b"<html>Not Available</html>" mtype = 'text/html' if qlist: row = qlist[0] media_path = row.media_path if media_path and os.path.exists(media_path): data = cls.format_note(row, media_path) mtype = 'text/html' response = HttpResponse() response['mimetype'] = mtype response['content-type'] = mtype response.write(data) return response @classmethod def save_customized_note(cls, usr, url_id, mode='read-note', req=None): text = req.POST.get('edited_note', '') print(text) qlist = Library.objects.filter(usr=usr, id=url_id).select_related() data = b"<html>Not Available</html>" mtype = 'text/html' if qlist: row = qlist[0] media_path = row.media_path if media_path and os.path.exists(media_path): with open(media_path, "w") as f: f.write(text) mtype = 'text/html' response = HttpResponse() response['mimetype'] = mtype response['content-type'] = mtype response.write(bytes("Saved", "utf-8")) return response @staticmethod def format_note(row, media_path): content = open(media_path, "r").read() if row: if '/' in row.directory: base_dir = '{}/{}/subdir/{}/{}'.format(settings.ROOT_URL_LOCATION, row.usr.username, row.directory, row.id) else: base_dir = '{}/{}/{}/{}'.format(settings.ROOT_URL_LOCATION, row.usr.username, row.directory, row.id) read_url = base_dir + '/read' read_pdf = base_dir + '/read-pdf' read_png = base_dir + '/read-png' read_html = base_dir + '/read-html' else: read_url = read_pdf = read_png = read_html = '#' card_bg = '' card_tab = '' if row.reader_mode == UserSettings.DARK: card_bg = 'text-white bg-dark' card_tab = 'bg-dark border-dark text-white' elif row.reader_mode == UserSettings.LIGHT: card_bg = 'bg-light' elif row.reader_mode == UserSettings.GRAY: card_bg = 'text-white bg-secondary' card_tab = 'bg-secondary border-secondary text-white' template = """ <html> <head> <meta charset="utf-8"> <title>{title}</title> <link rel="stylesheet" href="/static/css/bootstrap.min.css"> <link rel="stylesheet" href="/static/css/bootstrap.min.css.map"> <script src="/static/js/jquery-3.3.1.min.js"></script> <script src="/static/js/popper.min.js"></script> <script src="/static/js/bootstrap.min.js"></script> <link rel="stylesheet" href="/static/css/summernote-bs4.css"> <script src="/static/js/summernote-bs4.js"></script> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="referrer" content="no-referrer"> </head> <body> <div class="row px-4" id="summernote"></div> <div class="row px-4 py-4"> <button id="save" class="btn btn-primary" onclick="save()" type="button"> Save </button> </div> <script> $('#summernote').summernote({{placeholder: "Text..", tabsize: 10, height: 500}}); $("#summernote").summernote("code", `{content}`); var save = function() {{ var markup = $('#summernote').summernote('code'); var formdata = new FormData; formdata.append('edited_note', markup); var csrftoken = getCookie('csrftoken'); var client = new postRequestUpload(); var api_link = window.location.href + '-save'; client.post(api_link, formdata, csrftoken, function(response) {{ console.log(response); }}) }}; function getCookie(name) {{ var cookieValue = null; if (document.cookie && document.cookie !== '') {{ var cookies = document.cookie.split(';'); for (var i = 0; i < cookies.length; i++) {{ var cookie = jQuery.trim(cookies[i]); // Does this cookie string begin with the name we want? if (cookie.substring(0, name.length + 1) === (name + '=')) {{ cookieValue = decodeURIComponent(cookie.substring(name.length + 1)); break; }} }} }} return cookieValue; }}; var postRequestUpload = function() {{ this.post = function(url, params, token, callbak) {{ var http_req = new XMLHttpRequest(); http_req.onreadystatechange = function() {{ if (http_req.readyState == 4 && http_req.status == 200) {{callbak(http_req.responseText);}} }} http_req.open( "POST", url, true ); http_req.setRequestHeader("X-CSRFToken", token); http_req.send(params); }} }}; </script> </body> </html> """.format(title="Notes", content=content) return template @classmethod def get_content(cls, row, url_id, media_path): data = "" req = cls.vnt.get(row.url) media_path_parent, _ = os.path.split(media_path) if not os.path.exists(media_path_parent): os.makedirs(media_path_parent) if req and req.content_type and req.html: mtype = req.content_type.split(';')[0].strip() if mtype in cls.mtype_list: content = req.html with open(media_path, 'w') as fd: fd.write(content) data = cls.format_html( row, media_path, content=content, custom_html=True ) fav_nam = str(url_id) + '.ico' final_favicon_path = os.path.join(cls.fav_path, fav_nam) if not os.path.exists(final_favicon_path): cls.get_favicon_link(req.html, row.url, final_favicon_path) return data @classmethod def format_html(cls, row, media_path, content=None, custom_html=False): media_dir, file_path = os.path.split(media_path) resource_dir = os.path.join(settings.ARCHIVE_LOCATION, 'resources', str(row.id)) resource_link = '{}/{}/{}/{}/{}'.format(settings.ROOT_URL_LOCATION, row.usr.username, row.directory, str(row.id), 'resources') if not os.path.exists(resource_dir): os.makedirs(resource_dir) if not content: content = "" with open(media_path, encoding='utf-8', mode='r') as fd: content = fd.read() soup = BeautifulSoup(content, 'lxml') for script in soup.find_all('script'): script.decompose() url_path = row.url ourl = urlparse(url_path) ourld = ourl.scheme + '://' + ourl.netloc link_list = soup.find_all(['a', 'link', 'img']) for link in link_list: if link.name == 'img': lnk = link.get('src', '') else: lnk = link.get('href', '') if lnk and lnk != '#': if link.name == 'img' or (link.name == 'link' and '.css' in lnk): lnk = dbxs.format_link(lnk, url_path) lnk_bytes = bytes(lnk, 'utf-8') h = hashlib.sha256(lnk_bytes) lnk_hash = h.hexdigest() if link.name == 'img': link['src'] = resource_link + '/' + lnk_hash if custom_html: link['class'] = 'img-thumbnail' else: lnk_hash = lnk_hash + '.css' link['href'] = resource_link + '/' + lnk_hash file_image = os.path.join(resource_dir, lnk_hash) if not os.path.exists(file_image): cls.vnt_noblock.get(lnk, out=file_image) logger.info('getting file: {}, out: {}'.format(lnk, file_image)) elif lnk.startswith('http'): pass else: nlnk = dbxs.format_link(lnk, url_path) if link.name == 'img': link['src'] = nlnk if custom_html: link['class'] = 'img-thumbnail' else: link['href'] = nlnk if custom_html: ndata = soup.prettify() if soup.title: title = soup.title.text else: title = row.url.rsplit('/')[-1] data = Document(ndata) data_sum = data.summary() if data_sum: nsoup = BeautifulSoup(data_sum, 'lxml') if nsoup.text.strip(): data = cls.custom_template(title, nsoup.prettify(), row) else: data = cls.custom_soup(ndata, title, row) else: data = cls.custom_soup(ndata, title, row) else: new_tag = soup.new_tag("script", src="/static/js/jquery-3.3.1.min.js") soup.find("body").append(new_tag) new_tag = soup.new_tag("script", src="/static/js/annotator.min.js") soup.find("body").append(new_tag) new_tag = soup.new_tag("script") new_tag.append(cls.ANNOTATION_SCRIPT) soup.find("body").append(new_tag) data = soup.prettify() return bytes(data, 'utf-8') @classmethod def custom_template(cls, title, content, row): if row: if '/' in row.directory: base_dir = '{}/{}/subdir/{}/{}'.format(settings.ROOT_URL_LOCATION, row.usr.username, row.directory, row.id) else: base_dir = '{}/{}/{}/{}'.format(settings.ROOT_URL_LOCATION, row.usr.username, row.directory, row.id) read_url = base_dir + '/read' read_pdf = base_dir + '/read-pdf' read_png = base_dir + '/read-png' read_html = base_dir + '/read-html' else: read_url = read_pdf = read_png = read_html = '#' card_bg = '' card_tab = '' if row.reader_mode == UserSettings.DARK: card_bg = 'text-white bg-dark' card_tab = 'bg-dark border-dark text-white' elif row.reader_mode == UserSettings.LIGHT: card_bg = 'bg-light' elif row.reader_mode == UserSettings.GRAY: card_bg = 'text-white bg-secondary' card_tab = 'bg-secondary border-secondary text-white' template = """ <html> <head> <meta charset="utf-8"> <title>{title}</title> <link rel="stylesheet" href="/static/css/bootstrap.min.css"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="referrer" content="no-referrer"> </head> <body> <div class="container-fluid"> <div class="row"> <div class="col-sm {card_bg}"> <div class='card text-left {card_bg} mb-3'> <div class='card-header'> <ul class="nav nav-tabs card-header-tabs"> <li class="nav-item"> <a class="nav-link {card_tab} active" href="{read_url}">HTML</a> </li> <li class="nav-item"> <a class="nav-link" href="{read_html}">Original</a> </li> <li class="nav-item"> <a class="nav-link" href="{read_pdf}">PDF</a> </li> <li class="nav-item"> <a class="nav-link" href="{read_png}">PNG</a> </li> </ul> </div> <div class='card-body'> <h5 class="card-title">{title}</h5> {content} </div> </div> </div> </div> </div> <script src="/static/js/jquery-3.3.1.min.js"></script> <script src="/static/js/annotator.min.js"></script> <script>{annot_script}</script> </body> </html> """.format(title=title, content=content, read_url=read_url, read_pdf=read_pdf, read_png=read_png, read_html=read_html, card_bg=card_bg, card_tab=card_tab, annot_script=cls.ANNOTATION_SCRIPT) return template @classmethod def custom_soup(cls, data, title, row=None): soup = BeautifulSoup(data, 'lxml') text_result = soup.find_all(text=True) final_result = [] for elm in text_result: ntag = '' ptag = elm.parent.name if ptag == 'a': href = elm.parent.get('href') ntag = '<a href="{}">{}</a>'.format(href, elm) elif ptag in ['body', 'html', '[document]', 'img']: pass elif ptag == 'p': ntag = '<p class="card-text">{}</p>'.format(elm) elif ptag == 'span': ntag = '<span class="card-text">{}</span>'.format(elm) elif '\n' in elm: ntag = '</br>'; else: tag = elm.parent.name ntag = '<{tag}>{text}</{tag}>'.format(tag=tag, text=elm) if ntag: final_result.append(ntag) result = ''.join(final_result) result = re.sub(r'(</br>)+', '', result) content = cls.custom_template(title, result, row) return content @classmethod def get_favicon_link(cls, data, url_name, final_favicon_path): soup = BeautifulSoup(data, 'lxml') favicon_link = '' if not os.path.exists(final_favicon_path): links = soup.find_all('link') ilink = soup.find('link', {'rel':'icon'}) slink = soup.find('link', {'rel':'shortcut icon'}) if ilink: favicon_link = dbxs.format_link(ilink.get('href'), url_name) elif slink: favicon_link = dbxs.format_link(slink.get('href'), url_name) else: for i in links: rel = i.get('href') if (rel and (rel.endswith('.ico') or '.ico' in rel)): favicon_link = dbxs.format_link(rel, url_name) if not favicon_link: urlp = urlparse(url_name) favicon_link = urlp.scheme + '://' + urlp.netloc + '/favicon.ico' if favicon_link: cls.vnt_noblock.get(favicon_link, out=final_favicon_path) @classmethod def is_human_readable(cls, mtype): human_readable = False if mtype in cls.readable_format: human_readable = True return human_readable
def __init__(self, hdrs): if hdrs: self.hdrs = hdrs else: self.hdrs = {'User-Agent': 'Mozilla/5.0'} self.vnt = Vinanti(block=False, hdrs=self.hdrs, timeout=10)
def setupUi(self, MainWindow): global screen_height, screen_width MainWindow.setObjectName(_fromUtf8("MainWindow")) MainWindow.setEnabled(True) MainWindow.resize(800, 400) MainWindow.setMinimumSize(QtCore.QSize(0, 0)) MainWindow.setMaximumSize(QtCore.QSize(900, 400)) icon = QtGui.QIcon.fromTheme(_fromUtf8("")) MainWindow.setWindowIcon(icon) MainWindow.setLayoutDirection(QtCore.Qt.LeftToRight) self.centralwidget = QtWidgets.QWidget(MainWindow) self.centralwidget.setObjectName(_fromUtf8("centralwidget")) self.verticalLayout = QtWidgets.QVBoxLayout(self.centralwidget) self.verticalLayout.setObjectName(_fromUtf8("verticalLayout")) self.tabWidget = QtWidgets.QTabWidget(self.centralwidget) self.tabWidget.setObjectName(_fromUtf8("tabWidget")) self.tab = MyWidget(MainWindow) self.tab.setObjectName(_fromUtf8("tab")) self.horizontalLayout = QtWidgets.QHBoxLayout(self.tab) self.horizontalLayout.setObjectName(_fromUtf8("horizontalLayout")) self.label = QtWidgets.QLabel(self.tab) sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Expanding) sizePolicy.setHorizontalStretch(0) sizePolicy.setVerticalStretch(0) sizePolicy.setHeightForWidth(self.label.sizePolicy().hasHeightForWidth()) self.label.setSizePolicy(sizePolicy) self.label.setMaximumSize(QtCore.QSize(900, 290)) self.label.setText(_fromUtf8("")) self.label.setScaledContents(True) self.label.setObjectName(_fromUtf8("label")) self.horizontalLayout.addWidget(self.label) self.tabWidget.addTab(self.tab, _fromUtf8("")) self.tab_2 = QtWidgets.QWidget() self.tab_2.setObjectName(_fromUtf8("tab_2")) self.tabWidget.addTab(self.tab_2, _fromUtf8("")) self.verticalLayout.addWidget(self.tabWidget) self.frame = QtWidgets.QFrame(self.centralwidget) self.frame.setMinimumSize(QtCore.QSize(782, 60)) self.frame.setMaximumSize(QtCore.QSize(782, 16777215)) self.frame.setContextMenuPolicy(QtCore.Qt.ActionsContextMenu) self.frame.setFrameShape(QtWidgets.QFrame.NoFrame) self.frame.setFrameShadow(QtWidgets.QFrame.Raised) self.frame.setLineWidth(0) self.frame.setObjectName(_fromUtf8("frame")) self.prev = QtWidgets.QPushButton(self.frame) self.prev.setGeometry(QtCore.QRect(340, 20, 41, 21)) self.prev.setObjectName(_fromUtf8("prev")) self.next = QtWidgets.QPushButton(self.frame) self.next.setGeometry(QtCore.QRect(410, 20, 41, 20)) self.next.setObjectName(_fromUtf8("next")) self.date = QtWidgets.QDateEdit(self.frame) self.date.setGeometry(QtCore.QRect(620, 20, 110, 26)) self.date.setCalendarPopup(True) self.date.setObjectName(_fromUtf8("date")) self.go = QtWidgets.QPushButton(self.frame) self.go.setGeometry(QtCore.QRect(740, 20, 20, 20)) self.go.setObjectName(_fromUtf8("go")) self.btn1 = QtWidgets.QComboBox(self.frame) self.btn1.setGeometry(QtCore.QRect(30, 15, 110, 31)) self.btn1.setObjectName(_fromUtf8("btn1")) self.btn1.addItem(_fromUtf8("")) self.btn1.addItem(_fromUtf8("")) self.btn1.addItem(_fromUtf8("")) self.btn1.addItem(_fromUtf8("")) self.btn2 = QtWidgets.QPushButton(self.frame) self.btn2.setGeometry(QtCore.QRect(160, 20, 61, 21)) self.btn2.setObjectName(_fromUtf8("btn2")) self.btnM = QtWidgets.QPushButton(self.frame) self.btnM.setGeometry(QtCore.QRect(240, 20, 51, 21)) self.btnM.setObjectName(_fromUtf8("btnM")) self.verticalLayout.addWidget(self.frame) MainWindow.setCentralWidget(self.centralwidget) self.menubar = QtWidgets.QMenuBar(MainWindow) self.menubar.setGeometry(QtCore.QRect(0, 0, 800, 24)) self.menubar.setObjectName(_fromUtf8("menubar")) MainWindow.setMenuBar(self.menubar) self.statusbar = QtWidgets.QStatusBar(MainWindow) self.statusbar.setObjectName(_fromUtf8("statusbar")) MainWindow.setStatusBar(self.statusbar) self.scrollArea = QtGuiQWidgetScroll() self.scrollArea.setWidgetResizable(True) self.scrollArea.setMaximumSize(screen_width, screen_height-60) self.scrollArea.setObjectName(_fromUtf8("scrollArea")) self.scrollAreaWidgetContents = QtWidgets.QWidget() self.scrollAreaWidgetContents.setObjectName(_fromUtf8("scrollAreaWidgetContents")) self.vBox = QtWidgets.QVBoxLayout(self.scrollAreaWidgetContents) self.scrollArea.setWidget(self.scrollAreaWidgetContents) self.labelExp = QtWidgets.QLabel(self.scrollAreaWidgetContents) self.labelExp.setObjectName(_fromUtf8("labelExp")) self.labelExp.setScaledContents(True) self.vBox.addWidget(self.labelExp) self.horizontalLayout_2 = QtWidgets.QHBoxLayout(self.tab_2) self.horizontalLayout_2.setObjectName(_fromUtf8("horizontalLayout_2")) self.listComics = QtWidgets.QListWidget(self.tab_2) self.listComics.setObjectName(_fromUtf8("listComics")) self.horizontalLayout_2.addWidget(self.listComics) self.retranslateUi(MainWindow) self.tabWidget.setCurrentIndex(0) self.btn1.currentIndexChanged['QString'].connect(self.comics) self.listComics.itemDoubleClicked['QListWidgetItem*'].connect(self.addComics) self.prev.clicked.connect(self.previous) self.next.clicked.connect(self.nxt) self.go.clicked.connect(self.goto_direct) self.btn2.clicked.connect(partial(self.zoom_image)) self.btnM.clicked.connect(self.loadMoreComics) QtCore.QMetaObject.connectSlotsByName(MainWindow) self.hdrs = {'User-Agent':USER_AGENT} self.vnt = Vinanti(block=False, hdrs=self.hdrs) self.base_url = None self.name = None self.picn = None self.home_comics = None self.cur_date = None
def test_session_mix_aio(self): data_dict = {'hello': 'world', 'world': 'hello'} vnt = Vinanti(block=self.block, onfinished=hello, hdrs=self.hdr, method='POST', data=data_dict, group_task=True, backend='aiohttp') vnt.post('http://www.httpbin.org/post') vnt.add('http://www.httpbin.org/post', data={'clrs': 'algo'}) vnt.add('http://www.httpbin.org/post', data={'ast': 'OS'}) vnt.add('http://www.httpbin.org/post', data={'tma': 'calc'}, hdrs={'user-agent': 'curl'}) vnt.add('http://www.httpbin.org/get', method='GET', params={ 'hp': 'ca', 'ahu': 'tfcs' }) vnt.add('http://httpbin.org/get', method='HEAD', onfinished=namaste) vnt.add('http://httpbin.org/ip', method='GET', onfinished=namaste) vnt.start()
class Ui_MainWindow(object): def setupUi(self, MainWindow): global screen_height, screen_width MainWindow.setObjectName(_fromUtf8("MainWindow")) MainWindow.setEnabled(True) MainWindow.resize(800, 400) MainWindow.setMinimumSize(QtCore.QSize(0, 0)) MainWindow.setMaximumSize(QtCore.QSize(900, 400)) icon = QtGui.QIcon.fromTheme(_fromUtf8("")) MainWindow.setWindowIcon(icon) MainWindow.setLayoutDirection(QtCore.Qt.LeftToRight) self.centralwidget = QtWidgets.QWidget(MainWindow) self.centralwidget.setObjectName(_fromUtf8("centralwidget")) self.verticalLayout = QtWidgets.QVBoxLayout(self.centralwidget) self.verticalLayout.setObjectName(_fromUtf8("verticalLayout")) self.tabWidget = QtWidgets.QTabWidget(self.centralwidget) self.tabWidget.setObjectName(_fromUtf8("tabWidget")) self.tab = MyWidget(MainWindow) self.tab.setObjectName(_fromUtf8("tab")) self.horizontalLayout = QtWidgets.QHBoxLayout(self.tab) self.horizontalLayout.setObjectName(_fromUtf8("horizontalLayout")) self.label = QtWidgets.QLabel(self.tab) sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Expanding) sizePolicy.setHorizontalStretch(0) sizePolicy.setVerticalStretch(0) sizePolicy.setHeightForWidth(self.label.sizePolicy().hasHeightForWidth()) self.label.setSizePolicy(sizePolicy) self.label.setMaximumSize(QtCore.QSize(900, 290)) self.label.setText(_fromUtf8("")) self.label.setScaledContents(True) self.label.setObjectName(_fromUtf8("label")) self.horizontalLayout.addWidget(self.label) self.tabWidget.addTab(self.tab, _fromUtf8("")) self.tab_2 = QtWidgets.QWidget() self.tab_2.setObjectName(_fromUtf8("tab_2")) self.tabWidget.addTab(self.tab_2, _fromUtf8("")) self.verticalLayout.addWidget(self.tabWidget) self.frame = QtWidgets.QFrame(self.centralwidget) self.frame.setMinimumSize(QtCore.QSize(782, 60)) self.frame.setMaximumSize(QtCore.QSize(782, 16777215)) self.frame.setContextMenuPolicy(QtCore.Qt.ActionsContextMenu) self.frame.setFrameShape(QtWidgets.QFrame.NoFrame) self.frame.setFrameShadow(QtWidgets.QFrame.Raised) self.frame.setLineWidth(0) self.frame.setObjectName(_fromUtf8("frame")) self.prev = QtWidgets.QPushButton(self.frame) self.prev.setGeometry(QtCore.QRect(340, 20, 41, 21)) self.prev.setObjectName(_fromUtf8("prev")) self.next = QtWidgets.QPushButton(self.frame) self.next.setGeometry(QtCore.QRect(410, 20, 41, 20)) self.next.setObjectName(_fromUtf8("next")) self.date = QtWidgets.QDateEdit(self.frame) self.date.setGeometry(QtCore.QRect(620, 20, 110, 26)) self.date.setCalendarPopup(True) self.date.setObjectName(_fromUtf8("date")) self.go = QtWidgets.QPushButton(self.frame) self.go.setGeometry(QtCore.QRect(740, 20, 20, 20)) self.go.setObjectName(_fromUtf8("go")) self.btn1 = QtWidgets.QComboBox(self.frame) self.btn1.setGeometry(QtCore.QRect(30, 15, 110, 31)) self.btn1.setObjectName(_fromUtf8("btn1")) self.btn1.addItem(_fromUtf8("")) self.btn1.addItem(_fromUtf8("")) self.btn1.addItem(_fromUtf8("")) self.btn1.addItem(_fromUtf8("")) self.btn2 = QtWidgets.QPushButton(self.frame) self.btn2.setGeometry(QtCore.QRect(160, 20, 61, 21)) self.btn2.setObjectName(_fromUtf8("btn2")) self.btnM = QtWidgets.QPushButton(self.frame) self.btnM.setGeometry(QtCore.QRect(240, 20, 51, 21)) self.btnM.setObjectName(_fromUtf8("btnM")) self.verticalLayout.addWidget(self.frame) MainWindow.setCentralWidget(self.centralwidget) self.menubar = QtWidgets.QMenuBar(MainWindow) self.menubar.setGeometry(QtCore.QRect(0, 0, 800, 24)) self.menubar.setObjectName(_fromUtf8("menubar")) MainWindow.setMenuBar(self.menubar) self.statusbar = QtWidgets.QStatusBar(MainWindow) self.statusbar.setObjectName(_fromUtf8("statusbar")) MainWindow.setStatusBar(self.statusbar) self.scrollArea = QtGuiQWidgetScroll() self.scrollArea.setWidgetResizable(True) self.scrollArea.setMaximumSize(screen_width, screen_height-60) self.scrollArea.setObjectName(_fromUtf8("scrollArea")) self.scrollAreaWidgetContents = QtWidgets.QWidget() self.scrollAreaWidgetContents.setObjectName(_fromUtf8("scrollAreaWidgetContents")) self.vBox = QtWidgets.QVBoxLayout(self.scrollAreaWidgetContents) self.scrollArea.setWidget(self.scrollAreaWidgetContents) self.labelExp = QtWidgets.QLabel(self.scrollAreaWidgetContents) self.labelExp.setObjectName(_fromUtf8("labelExp")) self.labelExp.setScaledContents(True) self.vBox.addWidget(self.labelExp) self.horizontalLayout_2 = QtWidgets.QHBoxLayout(self.tab_2) self.horizontalLayout_2.setObjectName(_fromUtf8("horizontalLayout_2")) self.listComics = QtWidgets.QListWidget(self.tab_2) self.listComics.setObjectName(_fromUtf8("listComics")) self.horizontalLayout_2.addWidget(self.listComics) self.retranslateUi(MainWindow) self.tabWidget.setCurrentIndex(0) self.btn1.currentIndexChanged['QString'].connect(self.comics) self.listComics.itemDoubleClicked['QListWidgetItem*'].connect(self.addComics) self.prev.clicked.connect(self.previous) self.next.clicked.connect(self.nxt) self.go.clicked.connect(self.goto_direct) self.btn2.clicked.connect(partial(self.zoom_image)) self.btnM.clicked.connect(self.loadMoreComics) QtCore.QMetaObject.connectSlotsByName(MainWindow) self.hdrs = {'User-Agent':USER_AGENT} self.vnt = Vinanti(block=False, hdrs=self.hdrs) self.base_url = None self.name = None self.picn = None self.home_comics = None self.cur_date = None def retranslateUi(self, MainWindow): MainWindow.setWindowTitle(_translate("MainWindow", "Read Comics", None)) self.tabWidget.setTabText(self.tabWidget.indexOf(self.tab), _translate("MainWindow", "Tab 1", None)) self.tabWidget.setTabText(self.tabWidget.indexOf(self.tab_2), _translate("MainWindow", "Tab 2", None)) self.btn1.setItemText(0, _translate("MainWindow", "Select", None)) self.btn1.setItemText(1, _translate("MainWindow", "Calvin", None)) self.btn1.setItemText(2, _translate("MainWindow", "Garfield", None)) self.btn1.setItemText(3, _translate("MainWindow", "OneBigHappy", None)) self.date.setDisplayFormat(_translate("MainWindow", "yyyy/MM/dd", None)) self.next.setText(_translate("MainWindow", "N", None)) self.prev.setText(_translate("MainWindow", "P", None)) self.go.setText(_translate("MainWindow", "Go", None)) self.btn2.setText(_translate("MainWindow", "Original", None)) self.btnM.setText(_translate("MainWindow", "More", None)) self.btn2.setToolTip(_translate("MainWindow", "<html><head/><body><p>Show Original Image Size</p></body></html>", None)) def loadMoreComics(self): self.tabWidget.setCurrentIndex(1) comics_list = os.path.join(home_comics, 'config.txt') with open(comics_list, 'r') as f: lines = f.readlines() lines = [i.strip() for i in lines if i.strip()] if self.listComics.count() == 0: MainWindow.setWindowTitle('Wait..') url = "http://www.gocomics.com/comics/a-to-z" self.vnt.get(url, onfinished=partial(self.more_comics, lines)) def more_comics(self, lines, *args): MainWindow.setWindowTitle('Select Comics') content = args[-1].html soup = BeautifulSoup(content, 'html.parser') links = soup.findAll('a') for i in links: j = i.get('href') if j: last = j.rsplit('/')[-1] if last.isnumeric(): karr = j.split('/') if len(karr) > 1: k = karr[1] else: k = None if k in lines: self.listComics.addItem('#'+k) elif k: self.listComics.addItem(k) def fetch_comics(self, base_url, dt): t = re.sub('/', '-', dt) picn = os.path.join(self.home_comics, '{}-{}.jpg'.format(self.name, t)) logger.debug(picn) self.picn = picn if not os.path.isfile(picn): MainWindow.setWindowTitle('Wait..') url = base_url + dt self.vnt.get(url, onfinished=partial(self.process_page, dt, picn)) logger.debug(url) else: img = QtGui.QPixmap(picn, "1") self.label.setPixmap(img) title = '{} {}'.format(self.name, dt) MainWindow.setWindowTitle(title) self.scrollArea.setWindowTitle(title) if not self.scrollArea.isHidden(): self.zoom_image(picn) def process_page(self, *args): content = args[-1].html m = re.findall('data-image="http[^"]*', content) logger.debug(m) dt = args[0] picn = args[1] for j, i in enumerate(m): m[j] = re.sub('data-image="', "", i) logger.debug(m) if len(m) > 0: try: url = m[1] except: url = m[0] self.vnt.get(url, onfinished=partial(self.set_picture, picn, dt), out=picn) logger.debug('processing page') else: MainWindow.setWindowTitle('Comic strip not available for this date') def set_picture(self, *args): picn = args[0] dt = args[1] if os.path.isfile(picn): title = '{} {}'.format(self.name, dt) self.scrollArea.setWindowTitle(title) MainWindow.setWindowTitle(title) img = QtGui.QPixmap(picn, "1") self.label.setPixmap(img) if not self.scrollArea.isHidden(): self.zoom_image(picn) else: MainWindow.setWindowTitle('Comic strip not available for this date') logger.debug('setting-picture') def addComics(self): comics_list = os.path.join(home_comics, 'config.txt') r = self.listComics.currentRow() item = self.listComics.item(r) if item: txt = item.text() if not txt.startswith('#'): if os.stat(comics_list).st_size == 0: with open(comics_list, 'w') as f: f.write(txt) self.btn1.addItem(txt) self.listComics.takeItem(r) del item self.listComics.insertItem(r, '#'+txt) self.listComics.setCurrentRow(r) else: lines = [] with open(comics_list, 'r') as f: lines = f.readlines() lines = [i.strip() for i in lines if i.strip()] if txt not in lines: with open(comics_list, 'a') as f: f.write('\n'+txt) self.btn1.addItem(txt) self.listComics.takeItem(r) del item self.listComics.insertItem(r, '#'+txt) self.listComics.setCurrentRow(r) else: lines = [] txt = txt.replace('#', '') with open(comics_list, 'r') as f: lines = f.readlines() lines = [i.strip() for i in lines if i.strip()] new_lines = [] for i, j in enumerate(lines): if txt != j: new_lines.append(j) with open(comics_list, 'w') as f: for i, j in enumerate(new_lines): if i == 0: f.write(j) else: f.write('\n'+j) self.listComics.takeItem(r) del item self.listComics.insertItem(r, txt) self.listComics.setCurrentRow(r) self.btn1.clear() original_list = ['Select', 'Calvin', 'Garfield', 'OneBigHappy'] new_list = original_list + new_lines for i in new_list: self.btn1.addItem(i) def comics(self): self.name = str(self.btn1.currentText()) if self.name != "Select" and self.name: self.tabWidget.setCurrentIndex(0) self.home_comics = os.path.join(home_comics, self.name) if not os.path.exists(self.home_comics): os.makedirs(self.home_comics) if self.name == "Calvin": self.base_url = "http://www.gocomics.com/calvinandhobbes/" elif self.name == "Garfield": self.base_url = "http://www.gocomics.com/garfield/" elif self.name == "OneBigHappy": self.base_url = "http://www.gocomics.com/onebighappy/" else: self.base_url = "http://www.gocomics.com/"+self.name+'/' self.goto_page() def zoom_image(self, picn=None): global screen_width, screen_height logger.debug(picn) try: if not picn: picn = self.picn if os.path.isfile(picn): im = Image.open(picn) w, h = im.size img = QtGui.QPixmap(picn, "1") self.labelExp.setPixmap(img) QtWidgets.QApplication.processEvents() print (w, screen_width, h, screen_height) if w < screen_width: wd = w+20 else: wd = screen_width if h < screen_height: ht = h + 20 else: ht = screen_height - 60 self.scrollArea.resize(wd, ht) self.scrollArea.show() except Exception as err: logger.error(err) def goto_direct(self): today = datetime.date(self.date.date().year(), self.date.date().month(), self.date.date().day()) td = re.sub('-', '/', str(today)) print (td) self.fetch_comics(self.base_url, td) def goto_page(self): self.vnt.get(self.base_url, onfinished=self.process_go_page) def process_go_page(self, *args): content = args[-1].html base_url = args[-2] logger.debug('{} {}'.format(args[-2], args[-3])) try: soup = BeautifulSoup(content, 'html.parser') link = soup.find('div', {'class':'feature'}) link1 = link.find('h1') link2 = link1.find('a')['href'] l = link2.split('/') td = l[-3]+'/'+l[-2]+'/'+l[-1] logger.debug(td) self.cur_date = datetime.date(int(l[-3]), int(l[-2]), int(l[-1])) except Exception as err: today = datetime.date(self.date.date().year(), self.date.date().month(), self.date.date().day()) td = re.sub('-', '/', str(today)) logger.debug(td) self.fetch_comics(base_url, td) self.tab.setFocus() logger.debug('process_go_page') def previous(self): today = datetime.date(self.date.date().year(), self.date.date().month(), self.date.date().day()) day = datetime.timedelta(days=1) yday = today - day self.date.setDate(yday) td = re.sub('-', '/', str(yday)) logger.debug(td) self.fetch_comics(self.base_url, td) def nxt(self): today = datetime.date(self.date.date().year(), self.date.date().month(), self.date.date().day()) day = datetime.timedelta(days=1) tm = today + day if tm <= self.cur_date: self.date.setDate(tm) td = re.sub('-', '/', str(tm)) print (td) self.fetch_comics(self.base_url, td)