コード例 #1
0
 def __init__(self, ui):
     self.ui = ui
     if os.name == 'posix':
         verify = True
     else:
         verify = False
     self.vnt = Vinanti(block=True, hdrs={'User-Agent':self.ui.user_agent}, verify=verify)
コード例 #2
0
 def test_delete(self):
     urls = ['http://httpbin.org/delete', 'http://httpbin.org/delete']
     vnt = Vinanti(block=False)
     vnt.delete(urls,
                onfinished=hello,
                hdrs=self.hdr,
                data={'garfield': 'peanuts'})
コード例 #3
0
 def test_patch(self):
     url = 'http://httpbin.org/patch'
     vnt = Vinanti(block=True)
     vnt.patch(url,
               onfinished=hello,
               hdrs=self.hdr,
               data={'gotham city': 'rajnagar'})
コード例 #4
0
 def test_put(self):
     url = 'http://httpbin.org/put'
     vnt = Vinanti(block=True)
     vnt.put(url,
             onfinished=hello,
             hdrs=self.hdr,
             data={'calvin': 'hobbes'})
コード例 #5
0
 def test_delete(self):
     url = 'http://httpbin.org/delete'
     vnt = Vinanti(block=True)
     vnt.delete(url,
                onfinished=hello,
                hdrs=self.hdr,
                data={'garfield': 'peanuts'})
コード例 #6
0
 def test_patch(self):
     urls = ['http://httpbin.org/patch', 'http://httpbin.org/patch']
     vnt = Vinanti(block=False)
     vnt.patch(urls,
               onfinished=hello,
               hdrs=self.hdr,
               data={'gotham city': 'rajnagar'})
コード例 #7
0
ファイル: tvdb.py プロジェクト: thatsparks/kawaii-player
 def __init__(self, base_url=None, lang='en', wait=None,
              episode_summary=False, search_and_grab=True,
              backend=None, hdrs=None):
     if not base_url:
         self.base_url = 'https://www.thetvdb.com'
     else:
         self.base_url = base_url
     self.language = lang
     if hdrs:
         self.hdrs = hdrs
     else:
         self.hdrs = {'User-Agent':'Mozilla/5.0'}
     if os.name == 'posix':
         verify = True
     else:
         verify = False
     if isinstance(wait, int) or isinstance(wait, float):
         self.vnt = Vinanti(block=False, hdrs=self.hdrs, wait=wait, timeout=10, verify=verify)
     else:
         self.vnt = Vinanti(block=False, hdrs=self.hdrs, timeout=10, verify=verify)
     self.fanart_list = []
     self.poster_list = []
     self.banner_list = []
     self.final_dict = {}
     self.time = time.time()
     self.ep_summary = episode_summary
     self.search_and_grab = search_and_grab
     self.backend = Backend(hdrs)
     self.backend_search = backend
コード例 #8
0
 def test_put(self):
     urls = ['http://httpbin.org/put', 'http://httpbin.org/put']
     vnt = Vinanti(block=False)
     vnt.put(urls,
             onfinished=hello,
             hdrs=self.hdr,
             data={'calvin': 'hobbes'})
コード例 #9
0
 def test_post(self):
     urls = ['http://httpbin.org/post', 'http://httpbin.org/post']
     vnt = Vinanti(block=False)
     vnt.post(urls,
              onfinished=hello,
              hdrs=self.hdr,
              data=(('moe', 'curly'), ('moe', 'larry')))
コード例 #10
0
 def test_post_more(self):
     urls = ['http://httpbin.org/post', 'http://httpbin.org/post']
     vnt = Vinanti(block=False)
     vnt.post(urls,
              onfinished=hello,
              hdrs=self.hdr,
              data={
                  'yotsubato': 'aria',
                  'mushishi': 'kino'
              })
コード例 #11
0
ファイル: test_crawl.py プロジェクト: therumbler/vinanti
 def test_crawl_urllib(self):
     vnt = Vinanti(block=False,
                   backend='urllib',
                   max_requests=5,
                   hdrs=hdr,
                   session=True,
                   loop_forever=False,
                   wait=0.2)
     url = 'https://docs.python.org/3/reference/index.html'
     vnt.crawl(url, onfinished=partial(hello, vnt, 'test_crawl_urllib'))
コード例 #12
0
 def test_get_params(self):
     urls = ['http://httpbin.org/get', 'http://httpbin.org/get']
     vnt = Vinanti(block=False)
     vnt.get(urls,
             onfinished=hello,
             hdrs=self.hdr,
             params={
                 'billoo': 'diamond comics',
                 'dhruva': 'raj comics'
             })
コード例 #13
0
 def test_session(self):
     vnt = Vinanti(block=self.block,
                   method='GET',
                   onfinished=hello,
                   hdrs=self.hdr,
                   group_task=True)
     vnt.get('http://www.google.com', out='/tmp/1.html')
     vnt.add('http://www.wikipedia.org', out='/tmp/2.html')
     vnt.add('http://www.google.com', out='/tmp/3.html')
     vnt.start()
コード例 #14
0
 def test_proxies(self, test_url=None):
     vnt = Vinanti(block=False, multiprocess=True, timeout=30)
     url = test_url if test_url else "http://osu.ppy.sh/legal/terms"
     for p in self.proxies:
         vnt.head(url,
                  hdrs={"User-Agent": self.get_useragent()},
                  proxies={
                      "http": "http://{}/".format(p),
                  },
                  wait=0.5,
                  onfinished=partial(self.__test_proxies_callback__, vnt,
                                     p))
コード例 #15
0
ファイル: parsing.py プロジェクト: xenking/quicksong
    def urls_to_ids(self, urls: list):
        vnt = Vinanti(block=True,
                      hdrs={"User-Agent": "Mozilla/5.0"},
                      timeout=10)
        for url in urls:
            new_url = re.sub(r'/b/', '/beatmaps/', url)

            if not new_url == url:
                vnt.head(new_url, onfinished=self.urls_to_ids_callback)
            else:
                self.song_ids.append(get_song_id(url))
        del vnt
コード例 #16
0
ファイル: test_crawl.py プロジェクト: therumbler/vinanti
 def test_crawl_limit_urllib(self):
     vnt = Vinanti(block=False,
                   backend='urllib',
                   max_requests=5,
                   hdrs=hdr,
                   session=True,
                   loop_forever=False,
                   wait=0.2)
     url = 'https://docs.python.org/3/'
     vnt.crawl(url,
               depth_allowed=1,
               onfinished=partial(hello, vnt, 'test_crawl_limit_urllib'))
コード例 #17
0
ファイル: aclh.py プロジェクト: kanishka-linux/aclh
    def __prepare_request__(self, hdrs_dict, auth_tuple, data_tuple,
                            files_data, proxies, args):
        if args.no_verify:
            verify = False
        else:
            verify = True
        logger.debug('verify={}; cookie-unsafe={}'.format(
            verify, args.cookie_unsafe))
        vnt = Vinanti(block=False,
                      backend=args.backend,
                      hdrs=hdrs_dict,
                      wait=args.wait,
                      max_requests=args.max_requests,
                      continue_out=args.resume_download,
                      verify=verify,
                      auth=auth_tuple,
                      data=data_tuple,
                      cookie_unsafe=args.cookie_unsafe,
                      charset=args.charset,
                      timeout=args.timeout,
                      proxies=proxies,
                      files=files_data,
                      session=args.accept_cookies)

        if args.input_files:
            self.__process_files_urls__(vnt, args)
        else:
            self.__final_request__(vnt, hdrs_dict, auth_tuple, data_tuple,
                                   files_data, proxies, args)
コード例 #18
0
 def request_cookie(self):
     cookies_hdrs = {
         'authorization': "Basic eGVua2luZzoxOTI4Mzc0NjUwYXNk",
         'content-type': "application/x-www-form-urlencoded",
         'charset': "UTF-8",
     }
     payload = {'username': self['username'], 'password': self['password']}
     vnt_cookies = Vinanti(block=True,
                           hdrs={"User-Agent": "Mozilla/5.0"},
                           multiprocess=True,
                           session=True,
                           timeout=60)
     vnt_cookies.post('https://osu.ppy.sh/session',
                      onfinished=self.set_cookie_callback,
                      hdrs=cookies_hdrs,
                      data=payload)
コード例 #19
0
ファイル: test_auth.py プロジェクト: therumbler/vinanti
 def test_auth_noblock(self):
     vnt = Vinanti(block=False, log=logval, group_task=True)
     vnt.get(url1,
             onfinished=hello,
             hdrs=hdr,
             auth=('user-basic', 'password-basic'))
     vnt.add(url2,
             onfinished=hello,
             hdrs=hdr,
             auth_digest=('user-digest', 'password-digest'))
     vnt.start()
コード例 #20
0
 def test_proxy_noblock(self):
     vnt = Vinanti(block=False, log=logval, group_task=True)
     vnt.get('http://www.httpbin.org/ip',
             onfinished=hello,
             hdrs=hdr,
             proxies=proxies)
     vnt.add('http://www.httpbin.org/post',
             method='POST',
             data={'moe': 'curly'},
             onfinished=hello,
             hdrs=hdr,
             proxies=proxies)
     vnt.start()
コード例 #21
0
ファイル: parsing.py プロジェクト: xenking/quicksong
 def __init__(self,
              song_urls,
              config_path=None,
              download_path=None,
              songs_path=None,
              auto_start=None,
              multiprocess=None,
              use_proxy=None):
     self._config = Config(config_path)
     self._config.update({
         'download_path': download_path,
         'songs_path': songs_path,
         'use_proxy': use_proxy
     })
     self._header = {
         "User-Agent": "Mozilla/5.0",
         "Accept-Language": "en-US;q=0.9,en;q=0.8",
         "Accept": "text/html,application/xhtml+xml,application/xml",
         "Accept-Encoding": "gzip, deflate, br",
         "Cookie": self._config.get_cookie()
     }
     self._multiprocess = multiprocess if multiprocess else 6
     self._proxy = Proxy(
         proxy_numbers=100) if self._config['use_proxy'] else None
     self.download_path = Path(
         self._config['download_path']).resolve(strict=True)
     self.songs_path = Path(self._config['songs_path']).resolve(strict=True)
     vnt_args = {"wait": 3, "timeout": 30, "max_requests": 5, "log": False}
     if self._multiprocess:
         vnt_args.update({
             "multiprocess": True,
             "max_requests": self._multiprocess + 1
         })
     if self._proxy:
         vnt_args.update({"wait": 1, "timeout": 60})
     self.vnt = Vinanti(**vnt_args)
     self.existed_ids = frozenset(
         get_existing_ids([self.songs_path, self.download_path]))
     self.song_ids = []
     self.auto_start = auto_start
     self.urls_to_ids(song_urls)
コード例 #22
0
ファイル: test_save.py プロジェクト: therumbler/vinanti
 def test_save_file_aio(self):
     vnt = Vinanti(block=False, backend='aiohttp')
     vnt.get('http://www.google.com',
             onfinished=hello,
             hdrs=self.hdr,
             out='/tmp/1_aio.html')
     vnt.get('http://www.wikipedia.org',
             onfinished=hello,
             hdrs=self.hdr,
             out='/tmp/2_aio.html')
     vnt.get('http://www.google.com',
             onfinished=hello,
             hdrs=self.hdr,
             out='/tmp/3_aio.html')
コード例 #23
0
ファイル: test_save.py プロジェクト: therumbler/vinanti
 def test_save_file(self):
     vnt = Vinanti(block=False)
     vnt.get('http://www.google.com',
             onfinished=hello,
             hdrs=self.hdr,
             out='/tmp/1.html')
     vnt.get('http://www.wikipedia.org',
             onfinished=hello,
             hdrs=self.hdr,
             out='/tmp/2.html')
     vnt.get('http://www.google.com',
             onfinished=hello,
             hdrs=self.hdr,
             out='/tmp/3.html')
コード例 #24
0
ファイル: test_sync.py プロジェクト: therumbler/vinanti
 def test_no_async(self):
     vnt = Vinanti(block=True, hdrs=hdr)
     req = vnt.get('http://www.google.com')
     print(req.info)
     req = vnt.post('http://httpbin.org/post', data={'hello': 'world'})
     print(req.html)
     req = vnt.get('http://www.wikipedia.org')
     print(req.info)
     req = vnt.get('http://httpbin.org/get', method='HEAD')
     print(req.info)
コード例 #25
0
class CustomRead:
    
    readable_format = [
        'text/plain', 'text/html', 'text/htm',
        'text/css', 'application/xhtml+xml',
        'application/xml', 'application/json',
    ]
    mtype_list = [
        'text/htm', 'text/html', 'text/plain'
    ]
    vnt_noblock = Vinanti(block=False, hdrs={'User-Agent':settings.USER_AGENT},
                  backend=settings.VINANTI_BACKEND,
                  max_requests=settings.VINANTI_MAX_REQUESTS)
    vnt = Vinanti(block=True, hdrs={'User-Agent':settings.USER_AGENT})
    fav_path = settings.FAVICONS_STATIC
    VIDEO_ID_DICT = OrderedDict()
    CACHE_FILE = os.path.join(settings.TMP_LOCATION, 'cache')
    
    @classmethod
    def get_archived_file(cls, usr, url_id, mode='html', req=None, return_path=False):
        qset = Library.objects.filter(usr=usr, id=url_id)
        streaming_mode = False
        if not os.path.exists(settings.TMP_LOCATION):
            os.makedirs(settings.TMP_LOCATION)
        if qset:
            row = qset[0]
            media_path = row.media_path
            if mode in ['pdf', 'png'] and media_path:
                fln, ext = media_path.rsplit('.', 1)
                if mode == 'pdf':
                    media_path = fln + '.pdf'
                elif mode == 'png':
                    media_path = fln + '.png'
            elif mode == 'archive' and media_path:
                mdir, _ = os.path.split(media_path)
                filelist = os.listdir(mdir)
                mlist = []
                extset = set(['pdf', 'png', 'htm', 'html'])
                for fl in filelist:
                    ext = fl.rsplit('.', 1)
                    if ext and ext[-1] not in extset:
                        mlist.append(os.path.join(mdir, fl))
                for mfile in mlist:
                    if os.path.isfile(mfile) and os.stat(mfile).st_size:
                        media_path = mfile
                        streaming_mode = True
                        break
                if streaming_mode and req:
                    qlist = UserSettings.objects.filter(usrid=usr)
                    if qlist and not qlist[0].media_streaming:
                        streaming_mode = False
                        
            if media_path and os.path.exists(media_path):
                mtype = guess_type(media_path)[0]
                if not mtype:
                    mtype = 'application/octet-stream'
                ext = media_path.rsplit('.')[-1]
                if ext:
                    filename = row.title + '.' + ext
                    if '.' in row.title:
                        file_ext = row.title.rsplit('.', 1)[-1]
                        if ext == file_ext:
                            filename = row.title
                else:
                    filename = row.title + '.bin'
                if mtype in ['text/html', 'text/htm']:
                    data = cls.format_html(row, media_path)
                    return HttpResponse(data)
                elif streaming_mode:
                    if os.path.isfile(cls.CACHE_FILE):
                        with open(cls.CACHE_FILE, 'rb') as fd:
                            cls.VIDEO_ID_DICT = pickle.load(fd)
                    uid = str(uuid.uuid4())
                    uid = uid.replace('-', '')
                    while uid in cls.VIDEO_ID_DICT:
                        logger.debug("no unique ID, Generating again")
                        uid = str(uuid.uuid4())
                        uid = uid.replace('-', '')
                        time.sleep(0.01)
                    cls.VIDEO_ID_DICT.update({uid:[media_path, time.time()]})
                    cls.VIDEO_ID_DICT.move_to_end(uid, last=False)
                    if len(cls.VIDEO_ID_DICT) > settings.VIDEO_PUBLIC_LIST:
                        cls.VIDEO_ID_DICT.popitem()
                    with open(cls.CACHE_FILE, 'wb') as fd:
                        pickle.dump(cls.VIDEO_ID_DICT, fd)
                    if return_path:
                        title_slug = slugify(row.title, allow_unicode=True)
                        return '{}/getarchivedvideo/{}-{}'.format(usr.username, title_slug, uid)
                    else:
                        return cls.get_archived_video(req, usr.username, uid)
                else:
                    response = FileResponse(open(media_path, 'rb'))
                    mtype = 'video/webm' if mtype == 'video/x-matroska' else mtype
                    response['mimetype'] = mtype
                    response['content-type'] = mtype
                    response['content-length'] = os.stat(media_path).st_size
                    filename = filename.replace(' ', '.')
                    logger.info('{} , {}'.format(filename, mtype))
                    if not cls.is_human_readable(mtype) and not streaming_mode:
                        response['Content-Disposition'] = 'attachment; filename="{}"'.format(filename)
                    return response
            else:
                return HttpResponse('<html>File has not been archived in this format</html>')
        else:
            return HttpResponse(status=404)
    
    @classmethod
    def get_archived_video(cls, request, username, video_id):
        if video_id in cls.VIDEO_ID_DICT:
            media_path, ltime = cls.VIDEO_ID_DICT.get(video_id)
            logger.debug('{} {}'.format(media_path, ltime))
            if time.time() - ltime <= settings.VIDEO_ID_EXPIRY_LIMIT*3600:
                if os.path.isfile(media_path):
                    mtype = guess_type(media_path)[0]
                    if not mtype:
                        mtype = 'application/octet-stream'
                    range_header = request.META.get('HTTP_RANGE', '').strip()
                    range_match = settings.RANGE_REGEX.match(range_header)
                    size = os.stat(media_path).st_size
                    if range_match:
                        first_byte, last_byte = range_match.groups()
                        first_byte = int(first_byte) if first_byte else 0
                        last_byte = int(last_byte) if last_byte else size - 1
                        if last_byte >= size:
                            last_byte = size - 1
                        length = last_byte - first_byte + 1
                        response = StreamingHttpResponse(
                            RangeFileResponse(open(media_path, 'rb'), offset=first_byte,
                            length=length), status=206, content_type=mtype
                        )
                        response['Content-Length'] = str(length)
                        response['Content-Range'] = 'bytes {}-{}/{}'.format(first_byte, last_byte, size)
                    else:
                        response = StreamingHttpResponse(FileResponse(open(media_path, 'rb')))
                        response['content-length'] = size
                    mtype = 'video/webm' if mtype == 'video/x-matroska' else mtype
                    response['content-type'] = mtype
                    response['mimetype'] = mtype
                    response['Accept-Ranges'] = 'bytes'
                    return response
        return HttpResponse(status=404)
    
    @classmethod
    def generate_archive_media_playlist(cls, server, usr, directory):
        qset = Library.objects.filter(usr=usr, directory=directory)
        pls_txt = '#EXTM3U\n'
        extset = set(['pdf', 'png', 'htm', 'html'])
        if not os.path.exists(settings.TMP_LOCATION):
            os.makedirs(settings.TMP_LOCATION)
        if os.path.isfile(cls.CACHE_FILE):
            with open(cls.CACHE_FILE, 'rb') as fd:
                cls.VIDEO_ID_DICT = pickle.load(fd)
        for row in qset:
            streaming_mode = False
            media_path = row.media_path
            media_element = row.media_element
            title = row.title
            if media_path and media_element:
                mdir, _ = os.path.split(media_path)
                filelist = os.listdir(mdir)
                mlist = []
                for fl in filelist:
                    ext = fl.rsplit('.', 1)
                    if ext and ext[-1] not in extset:
                        mlist.append(os.path.join(mdir, fl))
                for mfile in mlist:
                    if os.path.isfile(mfile) and os.stat(mfile).st_size:
                        media_path = mfile
                        streaming_mode = True
                        break
            if media_path and os.path.exists(media_path):
                mtype = guess_type(media_path)[0]
                if not mtype:
                    mtype = 'application/octet-stream'
                if streaming_mode:
                    uid = str(uuid.uuid4())
                    uid = uid.replace('-', '')
                    while uid in cls.VIDEO_ID_DICT:
                        logger.debug("no unique ID, Generating again")
                        uid = str(uuid.uuid4())
                        uid = uid.replace('-', '')
                        time.sleep(0.01)
                    cls.VIDEO_ID_DICT.update({uid:[media_path, time.time()]})
                    cls.VIDEO_ID_DICT.move_to_end(uid, last=False)
                    if len(cls.VIDEO_ID_DICT) > settings.VIDEO_PUBLIC_LIST:
                        cls.VIDEO_ID_DICT.popitem()
                    title_slug = slugify(title, allow_unicode=True)
                    return_path = '{}/{}/getarchivedvideo/{}-{}'.format(server, usr.username, title_slug, uid)
                    pls_txt = pls_txt+'#EXTINF:0, {0}\n{1}\n'.format(title, return_path)
        with open(cls.CACHE_FILE, 'wb') as fd:
            pickle.dump(cls.VIDEO_ID_DICT, fd)
        uid = str(uuid.uuid4())
        uid = uid.replace('-', '')
        plfile = os.path.join(settings.TMP_LOCATION, uid)
        if not os.path.isfile(plfile):
            with open(plfile, 'wb') as fd:
                pickle.dump(pls_txt, fd)
        pls_path = '/{}/getarchivedplaylist/{}/{}'.format(usr.username, directory, uid)
        logger.debug(pls_path)
        return pls_path
        
    @classmethod
    def read_customized(cls, usr, url_id):
        qlist = Library.objects.filter(usr=usr, id=url_id).select_related()
        data = b"<html>Not Available</html>"
        mtype = 'text/html'
        if qlist:
            row = qlist[0]
            media_path = row.media_path
            if media_path and os.path.exists(media_path):
                mtype = guess_type(media_path)[0]
                
                if mtype in cls.mtype_list:
                    data = cls.format_html(row, media_path,
                                           custom_html=True)
                    if mtype == 'text/plain':
                        mtype = 'text/html'
            elif row.url:
                data = cls.get_content(row, url_id, media_path)
        response = HttpResponse()
        response['mimetype'] = mtype
        response['content-type'] = mtype
        response.write(data)
        return response
    
    @classmethod
    def get_content(cls, row, url_id, media_path):
        data = ""
        req = cls.vnt.get(row.url)
        media_path_parent, _ = os.path.split(media_path)
        if not os.path.exists(media_path_parent):
            os.makedirs(media_path_parent)
        if req and req.content_type and req.html:
            mtype = req.content_type.split(';')[0].strip()
            if mtype in cls.mtype_list:
                content = req.html
                with open(media_path, 'w') as fd:
                    fd.write(content)
                data = cls.format_html(
                    row, media_path, content=content,
                    custom_html=True
                )
                fav_nam = str(url_id) + '.ico'
                final_favicon_path = os.path.join(cls.fav_path, fav_nam)
                if not os.path.exists(final_favicon_path):
                    cls.get_favicon_link(req.html, row.url,
                                         final_favicon_path)
        return data
                    
    @classmethod
    def format_html(cls, row, media_path, content=None, custom_html=False):
        media_dir, file_path = os.path.split(media_path)
        resource_dir = os.path.join(settings.ARCHIVE_LOCATION, 'resources', str(row.id))
        resource_link = '/{}/{}/{}/{}'.format(row.usr.username, row.directory, str(row.id), 'resources')
        if not os.path.exists(resource_dir):
            os.makedirs(resource_dir)
        if not content:
            content = ""
            with open(media_path, encoding='utf-8', mode='r') as fd:
                content = fd.read()
        soup = BeautifulSoup(content, 'lxml')
        for script in soup.find_all('script'):
            script.decompose()
        url_path = row.url
        ourl = urlparse(url_path)
        ourld = ourl.scheme + '://' + ourl.netloc
        link_list = soup.find_all(['a', 'link', 'img'])
        for link in link_list:
            if link.name == 'img':
                lnk = link.get('src', '')
            else:
                lnk = link.get('href', '')
            if lnk and lnk != '#':
                if link.name == 'img' or (link.name == 'link' and '.css' in lnk):
                    lnk = dbxs.format_link(lnk, url_path)
                    lnk_bytes = bytes(lnk, 'utf-8')
                    h = hashlib.sha256(lnk_bytes)
                    lnk_hash = h.hexdigest()
                    if link.name == 'img':
                        link['src'] = resource_link + '/' + lnk_hash
                        if custom_html:
                            link['class'] = 'card-img-top'
                    else:
                        lnk_hash = lnk_hash + '.css'
                        link['href'] = resource_link + '/' + lnk_hash
                    file_image = os.path.join(resource_dir, lnk_hash)
                    if not os.path.exists(file_image):
                        cls.vnt_noblock.get(lnk, out=file_image)
                        logger.info('getting file: {}, out: {}'.format(lnk, file_image))
                elif lnk.startswith('http'):
                    pass
                else:
                    nlnk = dbxs.format_link(lnk, url_path)
                    if link.name == 'img':
                        link['src'] = nlnk
                        if custom_html:
                            link['class'] = 'card-img-top'
                    else:
                        link['href'] = nlnk
        if custom_html:
            ndata = soup.prettify()
            if soup.title:
                title = soup.title.text
            else:
                title = row.url.rsplit('/')[-1]
            data = Document(ndata)
            data_sum = data.summary()
            if data_sum:
                nsoup = BeautifulSoup(data_sum, 'lxml')
                if nsoup.text.strip():
                    data = cls.custom_template(title, nsoup.prettify(), row)
                else:
                    data = cls.custom_soup(ndata, title, row)
            else:
                data = cls.custom_soup(ndata, title, row)
        else:
            data = soup.prettify()
        return bytes(data, 'utf-8')
        
    @staticmethod
    def custom_template(title, content, row):
        if row:
            base_dir = '/{}/{}/{}'.format(row.usr.username, row.directory, row.id)
            read_url = base_dir + '/read'
            read_pdf = base_dir + '/read-pdf'
            read_png = base_dir + '/read-png'
            read_html = base_dir + '/read-html'
        else:
            read_url = read_pdf = read_png = read_html = '#'
            
        template = """
        <html>
            <head>
                <meta charset="utf-8">
                <title>{title}</title>
                <link rel="stylesheet" href="/static/css/bootstrap.min.css">
                <meta name="viewport" content="width=device-width, initial-scale=1.0">
                <meta name="referrer" content="no-referrer">
            </head>
        <body>
            <div class="container-fluid">
                <div class="row">
                    <div class="col-sm"></div>
                    <div class="col-sm">
                        <div class='card text-left bg-light mb-3'>
                            <div class='card-header'>
                                <ul class="nav nav-tabs card-header-tabs">
                                    <li class="nav-item">
                                        <a class="nav-link active" href="{read_url}">HTML</a>
                                    </li>
                                    <li class="nav-item">
                                        <a class="nav-link" href="{read_html}">Original</a>
                                    </li>
                                    <li class="nav-item">
                                        <a class="nav-link" href="{read_pdf}">PDF</a>
                                    </li>
                                    <li class="nav-item">
                                        <a class="nav-link" href="{read_png}">PNG</a>
                                    </li>
                                </ul>
                            </div>
                            <div class='card-body'>
                                <h5 class="card-title">{title}</h5>
                                {content}
                            </div>
                        </div>
                    </div>
                    <div class="col-sm"></div>
                </div>
            </div>
        </body>
        </html>
        """.format(title=title, content=content,
                   read_url=read_url, read_pdf=read_pdf,
                   read_png=read_png, read_html=read_html)
        return template

    @classmethod
    def custom_soup(cls, data, title, row=None):
        soup = BeautifulSoup(data, 'lxml')
        text_result = soup.find_all(text=True)
        final_result = []
        for elm in text_result:
            ntag = ''
            ptag = elm.parent.name
            if ptag == 'a':
                href = elm.parent.get('href')
                ntag = '<a href="{}">{}</a>'.format(href, elm)
            elif ptag in ['body', 'html', '[document]', 'img']:
                pass
            elif ptag == 'p':
                ntag = '<p class="card-text">{}</p>'.format(elm)
            elif ptag == 'span':
                ntag = '<span class="card-text">{}</span>'.format(elm)
            elif '\n' in elm:
                ntag = '</br>';
            else:
                tag = elm.parent.name
                ntag = '<{tag}>{text}</{tag}>'.format(tag=tag, text=elm)
            if ntag:
                final_result.append(ntag)
        result = ''.join(final_result)
        result = re.sub(r'(</br>)+', '', result)
        content = cls.custom_template(title, result, row)
        return content
    
    @classmethod
    def get_favicon_link(cls, data, url_name, final_favicon_path):
        soup = BeautifulSoup(data, 'lxml')
        favicon_link = ''
        if not os.path.exists(final_favicon_path):
            links = soup.find_all('link')
            ilink = soup.find('link', {'rel':'icon'})
            slink = soup.find('link', {'rel':'shortcut icon'})
            if ilink:
                favicon_link = dbxs.format_link(ilink.get('href'), url_name)
            elif slink:
                favicon_link = dbxs.format_link(slink.get('href'), url_name)
            else:
                for i in links:
                    rel = i.get('href')
                    if (rel and (rel.endswith('.ico') or '.ico' in rel)):
                        favicon_link = dbxs.format_link(rel, url_name)
                if not favicon_link:
                    urlp = urlparse(url_name)
                    favicon_link = urlp.scheme + '://' + urlp.netloc + '/favicon.ico'
            if favicon_link:
                cls.vnt_noblock.get(favicon_link, out=final_favicon_path)
    
    @classmethod
    def is_human_readable(cls, mtype):
        human_readable = False
        if mtype in cls.readable_format:
            human_readable = True
        return human_readable
コード例 #26
0
class ImportBookmarks:

    vnt = Vinanti(block=False,
                  hdrs={'User-Agent': settings.USER_AGENT},
                  max_requests=settings.VINANTI_MAX_REQUESTS,
                  backend=settings.VINANTI_BACKEND)

    vnt_task = Vinanti(block=False,
                       group_task=False,
                       backend='function',
                       multiprocess=settings.MULTIPROCESS_VINANTI,
                       max_requests=settings.MULTIPROCESS_VINANTI_MAX_REQUESTS)

    @classmethod
    def import_bookmarks(cls, usr, settings_row, import_file, mode='file'):
        book_dict = cls.convert_bookmark_to_dict(import_file, mode=mode)
        if not os.path.exists(settings.FAVICONS_STATIC):
            os.makedirs(settings.FAVICONS_STATIC)
        insert_links_list = []
        insert_dir_list = []
        url_list = []
        for dirname in book_dict:
            if '/' in dirname or ':' in dirname:
                dirname = re.sub(r'/|:', '-', dirname)
            if dirname:
                qdir = Library.objects.filter(usr=usr, directory=dirname)
                if not qdir:
                    dirlist = Library(usr=usr,
                                      directory=dirname,
                                      timestamp=timezone.now())
                    insert_dir_list.append(dirlist)
        if insert_dir_list:
            Library.objects.bulk_create(insert_dir_list)
        uqlist = Library.objects.filter(usr=usr).only('directory', 'url')
        urlset = set()
        if uqlist:
            urlset = set([(i.directory, i.url) for i in uqlist if i.url])
        for dirname, links in book_dict.items():
            for val in links:
                url, icon_u, add_date, title, descr = val
                url_tuple = (dirname, url)
                if url_tuple not in urlset:
                    logger.info(val)
                    add_date = datetime.fromtimestamp(int(add_date))
                    lib = Library(usr=usr,
                                  directory=dirname,
                                  url=url,
                                  icon_url=icon_u,
                                  timestamp=add_date,
                                  title=title,
                                  summary=descr)
                    insert_links_list.append(lib)
                    url_list.append(url)
                else:
                    logger.info('{}-->{}; already exists'.format(dirname, url))
        cls.insert_in_bulk(usr, settings_row, insert_links_list, url_list)

    @classmethod
    def insert_in_bulk(cls, usr, settings_row, insert_links_list, url_list):
        if insert_links_list:
            Library.objects.bulk_create(insert_links_list)

        qlist = Library.objects.filter(usr=usr, url__in=url_list)
        row_list = []
        for row in qlist:
            icon_url = row.icon_url
            row_id = row.id
            url = row.url
            if url:
                row.media_path = cls.get_media_path(url, row_id)
            final_favicon_path = os.path.join(settings.FAVICONS_STATIC,
                                              str(row_id) + '.ico')
            row_list.append((row.icon_url, final_favicon_path))
            row.save()
        for iurl, dest in row_list:
            if iurl and iurl.startswith('http'):
                cls.vnt.get(iurl, out=dest)

        if (settings_row
                and (settings_row.auto_archive or settings_row.auto_summary
                     or settings_row.autotag)):
            for row in qlist:
                if row.url:
                    dbxs.process_add_url(usr,
                                         row.url,
                                         row.directory,
                                         archive_html=False,
                                         row=row,
                                         settings_row=settings_row,
                                         media_path=row.media_path)

    @staticmethod
    def get_media_path(url, row_id):
        content_type = guess_type(url)[0]
        if content_type and content_type == 'text/plain':
            ext = '.txt'
        elif content_type:
            ext = guess_extension(content_type)
        else:
            ext = '.htm'
        out_dir = ext[1:].upper()
        out_title = str(row_id) + str(ext)
        media_dir = os.path.join(settings.ARCHIVE_LOCATION, out_dir)
        if not os.path.exists(media_dir):
            os.makedirs(media_dir)

        media_path_parent = os.path.join(media_dir, str(row_id))
        if not os.path.exists(media_path_parent):
            os.makedirs(media_path_parent)

        media_path = os.path.join(media_path_parent, out_title)
        return media_path

    @staticmethod
    def convert_bookmark_to_dict(import_file, mode='file'):
        links_dict = {}
        if mode == 'file':
            content = ""
            with open(import_file, 'r', encoding='utf-8') as fd:
                content = fd.read()
        else:
            content = import_file
        if content:
            content = re.sub('ICON="(.*?)"', "", content)
            ncontent = re.sub('\n', " ", content)
            links_group = re.findall('<DT><H3(.*?)/DL>', ncontent)
            nsr = 0
            nlinks = []
            for i, j in enumerate(links_group):
                j = j + '<DT>'
                nlinks.clear()
                dirfield = re.search('>(?P<dir>.*?)</H3>', j)
                if dirfield:
                    dirname = html.unescape(dirfield.group('dir'))
                else:
                    dirname = 'Unknown'
                links = re.findall('A HREF="(?P<url>.*?)"(?P<extra>.*?)<DT>',
                                   j)
                for url, extra in links:
                    dt = re.search('ADD_DATE="(?P<add_date>.*?)"', extra)
                    add_date = dt.group('add_date')
                    dt = re.search('ICON_URI="(?P<icon>.*?)"', extra)
                    if dt:
                        icon_u = dt.group('icon')
                    else:
                        icon_u = ''
                    dt = re.search('>(?P<title>.*?)</A>', extra)
                    if dt:
                        title = html.unescape(dt.group('title'))
                    else:
                        title = 'No Title'
                    dt = re.search('<DD>(?P<descr>.*?)(<DT>)?', extra)
                    if dt:
                        descr = html.unescape(dt.group('descr'))
                    else:
                        descr = 'Not Available'
                    logger.debug(url)
                    nlinks.append((url, icon_u, add_date, title, descr))
                if dirname in links_dict:
                    dirname = '{}-{}'.format(dirname, nsr)
                    nsr += 1
                links_dict.update({dirname: nlinks.copy()})
        return links_dict
コード例 #27
0
class DBAccess:

    vnt = Vinanti(block=False,
                  hdrs={'User-Agent': settings.USER_AGENT},
                  max_requests=settings.VINANTI_MAX_REQUESTS,
                  backend=settings.VINANTI_BACKEND,
                  timeout=300)
    vntbook = Vinanti(block=False,
                      hdrs={'User-Agent': settings.USER_AGENT},
                      max_requests=settings.VINANTI_MAX_REQUESTS,
                      backend=settings.VINANTI_BACKEND,
                      timeout=300)
    vnt_task = Vinanti(block=False,
                       group_task=False,
                       backend='function',
                       multiprocess=settings.MULTIPROCESS_VINANTI,
                       max_requests=settings.MULTIPROCESS_VINANTI_MAX_REQUESTS)

    @classmethod
    def add_new_url(cls, usr, request, directory, row):
        url_name = request.POST.get('add_url', '')
        if url_name:
            if url_name.startswith('md:'):
                url_name = url_name[3:].strip()
                archive_html = True
                media_element = True
            else:
                archive_html = False
                media_element = False
            if row:
                settings_row = row[0]
            else:
                settings_row = None
            url_list = Library.objects.filter(usr=usr,
                                              directory=directory,
                                              url=url_name)
            if not url_list and url_name:
                cls.process_add_url(usr,
                                    url_name,
                                    directory,
                                    archive_html,
                                    settings_row=settings_row,
                                    media_element=media_element)

    @classmethod
    def process_add_url(cls,
                        usr,
                        url_name,
                        directory,
                        archive_html,
                        row=None,
                        settings_row=None,
                        media_path=None,
                        media_element=False):
        part = partial(cls.url_fetch_completed, usr, url_name, directory,
                       archive_html, row, settings_row, media_path,
                       media_element)
        if row:
            cls.vntbook.get(url_name, onfinished=part)
        else:
            cls.vnt.get(url_name, onfinished=part)

    @classmethod
    def url_fetch_completed(cls, usr, url_name, directory, archive_html, row,
                            settings_row, media_path, media_element, *args):
        ext = None
        save = False
        save_text = False
        favicon_link = None
        final_og_link = None
        summary = 'none'
        req = args[-1]
        tags_list = []
        save_summary = False
        if req and req.content_type:
            if ';' in req.content_type:
                content_type = req.content_type.split(';')[0].strip()
            else:
                content_type = req.content_type
            if content_type == 'text/plain':
                ext = '.txt'
            else:
                ext = guess_extension(content_type)
            print(content_type, '------', ext)
        if req and req.html and not req.binary:
            if 'text/html' in req.content_type:
                soup = BeautifulSoup(req.html, 'html.parser')
                if soup.title:
                    title = soup.title.text
                    if title.lower() == 'youtube':
                        try_srch = re.search('document.title[^;]*', req.html)
                        if try_srch:
                            title = try_srch.group().replace(
                                'document.title = ', '')
                else:
                    title = url_name.rsplit('/')[-1]
                ilink = soup.find('link', {'rel': 'icon'})
                slink = soup.find('link', {'rel': 'shortcut icon'})
                mlink = soup.find('meta', {'property': 'og:image'})
                if mlink:
                    final_og_link = mlink.get('content', '')
                if ilink:
                    favicon_link = cls.format_link(ilink.get('href'), url_name)
                elif slink:
                    favicon_link = cls.format_link(slink.get('href'), url_name)
                else:
                    for link in soup.find_all('link'):
                        rel = link.get('href')
                        if (rel and (rel.endswith('.ico') or '.ico' in rel)):
                            favicon_link = cls.format_link(rel, url_name)
                    if not favicon_link:
                        urlp = urlparse(url_name)
                        favicon_link = urlp.scheme + '://' + urlp.netloc + '/favicon.ico'

                if archive_html or (settings_row
                                    and settings_row.auto_archive):
                    save_text = True
                if settings_row and (settings_row.autotag
                                     or settings_row.auto_summary):
                    summary, tags_list = Summarizer.get_summary_and_tags(
                        req.html, settings_row.total_tags)
            else:
                title = url_name.rsplit('/')[-1]
                save = True
        elif req and req.binary:
            title = url_name.rsplit('/')[-1]
            save = True
        else:
            ext = '.bin'
            title = url_name.rsplit('/', 1)[-1]
        if row is None:
            row = Library.objects.create(usr=usr,
                                         directory=directory,
                                         url=url_name,
                                         title=title,
                                         summary=summary,
                                         timestamp=timezone.now(),
                                         media_element=media_element)
        else:
            logger.debug('row - exists')
        if not media_path:
            if ext and ext.startswith('.'):
                out_dir = ext[1:].upper()
            else:
                out_dir = str(ext).upper()
            if not ext:
                print(req.content_type)
            out_title = str(row.id) + str(ext)
            media_dir = os.path.join(settings.ARCHIVE_LOCATION, out_dir)
            if not os.path.exists(media_dir):
                os.makedirs(media_dir)
            if not os.path.exists(settings.FAVICONS_STATIC):
                os.makedirs(settings.FAVICONS_STATIC)
            media_path_parent = os.path.join(media_dir, str(row.id))
            final_favicon_path = os.path.join(settings.FAVICONS_STATIC,
                                              str(row.id) + '.ico')
            final_og_image_path = os.path.join(settings.FAVICONS_STATIC,
                                               str(row.id) + '.png')
            media_path = os.path.join(media_path_parent, out_title)
            row.media_path = media_path
            row.save()
            if favicon_link:
                cls.vnt.get(favicon_link, out=final_favicon_path)
            if final_og_link:
                cls.vnt.get(final_og_link, out=final_og_image_path)
        elif media_path and row:
            final_favicon_path = os.path.join(settings.FAVICONS_STATIC,
                                              str(row.id) + '.ico')
            final_og_image_path = os.path.join(settings.FAVICONS_STATIC,
                                               str(row.id) + '.png')
            media_path_parent, out_title = os.path.split(media_path)
            if settings_row and settings_row.auto_summary and summary:
                row.summary = summary
            if settings_row and not tags_list:
                row.save()
            else:
                save_summary = True
            if not os.path.exists(final_favicon_path) and favicon_link:
                cls.vnt.get(favicon_link, out=final_favicon_path)
            if not os.path.exists(final_og_image_path) and final_og_link:
                cls.vnt.get(final_og_link, out=final_og_image_path)
        if save or save_text:
            if not os.path.exists(media_path_parent):
                os.makedirs(media_path_parent)
            if save:
                #req.save(req.request_object, media_path)
                cls.vnt.get(url_name, out=media_path)
            else:
                with open(media_path, 'w') as fd:
                    fd.write(req.html)
            if settings_row and ext in ['.htm', '.html']:
                cls.convert_html_pdf(media_path_parent, settings_row, row,
                                     url_name, media_path, media_element)
        if settings_row and tags_list:
            if save_summary:
                cls.edit_tags(usr,
                              row.id,
                              ','.join(tags_list),
                              '',
                              old_row=row)
            else:
                cls.edit_tags(usr, row.id, ','.join(tags_list), '')
        return row.id

    @classmethod
    def save_in_binary_format(cls, usr, request, directory):
        url_list = []
        for key, value in request.FILES.items():
            title = value.name
            content = value.read()
            ext = None
            content_type = guess_type(title)[0]
            if content_type and content_type == 'text/plain':
                ext = '.txt'
            elif content_type:
                ext = guess_extension(content_type)
            print(content_type, '------', ext)
            if not ext:
                ext = '.bin'
            out_dir = ext[1:].upper()
            row = Library.objects.create(usr=usr,
                                         directory=directory,
                                         title=title,
                                         timestamp=timezone.now())

            out_title = str(row.id) + str(ext)
            media_dir = os.path.join(settings.ARCHIVE_LOCATION, out_dir)
            if not os.path.exists(media_dir):
                os.makedirs(media_dir)

            media_path_parent = os.path.join(media_dir, str(row.id))
            if not os.path.exists(media_path_parent):
                os.makedirs(media_path_parent)

            media_path = os.path.join(media_path_parent, out_title)
            row.media_path = media_path
            url = '/{}/{}/{}/archive'.format(usr.username, directory, row.id)
            row.url = url
            row.save()
            with open(media_path, 'wb') as fd:
                fd.write(content)
            url_list.append(url)

        return url_list

    @classmethod
    def convert_html_pdf(cls, media_path_parent, settings_row, row, url_name,
                         media_path, media_element):
        if settings_row.save_pdf:
            pdf = os.path.join(media_path_parent, str(row.id) + '.pdf')
            cmd = [
                'wkhtmltopdf', '--custom-header', 'User-Agent',
                settings.USER_AGENT, '--javascript-delay', '500',
                '--load-error-handling', 'ignore', url_name, pdf
            ]
            if settings.USE_XVFB:
                cmd = ['xvfb-run', '--auto-servernum'] + cmd
            if settings.USE_CELERY:
                cls.convert_to_pdf_png.delay(cmd)
            else:
                cls.vnt_task.function(cls.convert_to_pdf_png_task,
                                      cmd,
                                      onfinished=partial(
                                          cls.finished_processing, 'pdf'))
        if settings_row.save_png:
            png = os.path.join(media_path_parent, str(row.id) + '.png')
            cmd = [
                'wkhtmltoimage', '--quality',
                str(settings_row.png_quality), '--custom-header', 'User-Agent',
                settings.USER_AGENT, '--javascript-delay', '500',
                '--load-error-handling', 'ignore', url_name, png
            ]
            if settings.USE_XVFB:
                cmd = ['xvfb-run', '--auto-servernum'] + cmd
            if settings.USE_CELERY:
                cls.convert_to_pdf_png.delay(cmd)
            else:
                cls.vnt_task.function(cls.convert_to_pdf_png_task,
                                      cmd,
                                      onfinished=partial(
                                          cls.finished_processing, 'image'))
        if media_element or row.media_element:
            out = os.path.join(media_path_parent, str(row.id) + '.mp4')
            cmd_str = settings_row.download_manager.format(iurl=url_name,
                                                           output=out)
            cmd = cmd_str.split()
            logger.debug(cmd)
            if cmd and cmd[0] in settings.DOWNLOAD_MANAGERS_ALLOWED:
                if settings.USE_CELERY:
                    cls.convert_to_pdf_png.delay(cmd)
                else:
                    cls.vnt_task.function(cls.convert_to_pdf_png_task,
                                          cmd,
                                          onfinished=partial(
                                              cls.finished_processing,
                                              'media'))

    @classmethod
    def convert_html_pdf_with_chromium(cls,
                                       media_path_parent,
                                       settings_row,
                                       row,
                                       url_name,
                                       media_path,
                                       mode='pdf'):
        if mode == 'pdf':
            pdf = os.path.join(media_path_parent, str(row.id) + '.pdf')
            cmd = [
                'chromium', '--headless', '--disable-gpu',
                '--print-to-pdf={}'.format(pdf), url_name
            ]
            if not settings.CHROMIUM_SANDBOX:
                cmd.insert(1, '--no-sandbox')
            if settings.USE_CELERY:
                cls.convert_to_pdf_png.delay(cmd)
            else:
                cls.vnt_task.function(cls.convert_to_pdf_png_task,
                                      cmd,
                                      onfinished=partial(
                                          cls.finished_processing, 'pdf'))
        elif mode == 'dom':
            htm = os.path.join(media_path_parent, str(row.id) + '.htm')
            cmd = [
                'chromium', '--headless', '--disable-gpu', '--dump-dom',
                url_name
            ]
            if not settings.CHROMIUM_SANDBOX:
                cmd.insert(1, '--no-sandbox')
            if settings.USE_CELERY:
                cls.getdom_chromium.delay(cmd, htm)
            else:
                cls.vnt_task.function(cls.getdom_task_chromium,
                                      cmd,
                                      htm,
                                      onfinished=partial(
                                          cls.finished_processing, 'html'))

    def getdom_task_chromium(cmd, htm):
        if os.name == 'posix':
            output = subprocess.check_output(cmd)
        else:
            output = subprocess.check_output(cmd, shell=True)
        with open(htm, 'wb') as fd:
            fd.write(output)
        return True

    @task(name="convert-to-pdf-png")
    def getdom_chromium(cmd, htm):
        if os.name == 'posix':
            output = subprocess.check_output(cmd)
        else:
            output = subprocess.check_output(cmd, shell=True)
        with open(htm, 'wb') as fd:
            fd.write(output)

    @classmethod
    def finished_processing(cls, val, *args):
        logger.info('{}-->>>>finished--->>>{}'.format(val, args))

    def convert_to_pdf_png_task(cmd):
        if os.name == 'posix':
            subprocess.call(cmd)
        else:
            subprocess.call(cmd, shell=True)
        return True

    @task(name="convert-to-pdf-png")
    def convert_to_pdf_png(cmd):
        if os.name == 'posix':
            subprocess.call(cmd)
        else:
            subprocess.call(cmd, shell=True)

    @staticmethod
    def get_rows_by_directory(usr,
                              directory=None,
                              search=None,
                              search_mode='title'):

        usr_list = []

        if search and search_mode != 'dir':
            if search_mode == 'title':
                usr_list = Library.objects.filter(
                    usr=usr, title__icontains=search).order_by('-timestamp')
            elif search_mode == 'url':
                usr_list = Library.objects.filter(
                    usr=usr, url__icontains=search).order_by('-timestamp')
            elif search_mode == 'tag':
                usr_list = Library.objects.filter(
                    usr=usr, tags__icontains=search).order_by('-timestamp')
            elif search_mode == 'summary':
                usr_list = Library.objects.filter(
                    usr=usr, summary__icontains=search).order_by('-timestamp')
        else:
            if not directory and search and search_mode == 'dir':
                directory = search
            usr_list = Library.objects.filter(
                usr=usr, directory=directory).order_by('-timestamp')

        nusr_list = []
        for row in usr_list:
            if row.url:
                if not row.tags:
                    tags = []
                else:
                    tags = row.tags.split(',')
                nusr_list.append(
                    (row.title, row.url, row.id, row.timestamp, tags,
                     row.directory, row.media_path, row.media_element))
        return nusr_list

    @staticmethod
    def get_rows_by_tag(usr, tagname):
        tagobj = Tags.objects.filter(tag=tagname)
        directory = 'tag'
        usr_list = []
        if tagobj:
            usr_list = URLTags.objects.select_related('url_id').filter(
                usr_id=usr, tag_id=tagobj[0])
            udict = {}
            tag_list = [tagname]
            for i in usr_list:
                uid = i.url_id.url
                dirname = i.url_id.directory
                udict.update({
                    uid: [
                        i.url_id.title, uid, i.url_id.id, i.url_id.timestamp,
                        [tagname], dirname, i.url_id.media_path,
                        i.url_id.media_element
                    ]
                })
            usr_list = [tuple(value) for key, value in udict.items()]
            return usr_list
        else:
            return None

    @staticmethod
    def populate_usr_list(usr, usr_list, create_dict=False):
        if create_dict:
            nlist = {}
        else:
            nlist = []
        index = 1
        username = usr.username
        for title, url, idd, timestamp, tag, directory, media_path, media_element in usr_list:
            title = re.sub('_|-', ' ', title)
            title = re.sub('/', ' / ', title)
            base_dir = '/{}/{}/{}'.format(usr, directory, idd)
            base_remove = base_dir + '/remove'
            base_et = base_dir + '/edit-bookmark'
            move_single = base_dir + '/move-bookmark'
            move_multiple = base_dir + '/move-bookmark-multiple'
            base_eu = base_dir + '/edit-url'
            read_url = base_dir + '/read'
            if media_path and os.path.exists(media_path):
                archive_media = base_dir + '/archive'
            else:
                archive_media = url
            netloc = urlparse(url).netloc
            if len(netloc) > 20:
                netloc = netloc[:20] + '..'
            timestamp = timestamp.strftime("%d %b %Y")
            final_favicon_path = os.path.join(settings.FAVICONS_STATIC,
                                              str(idd) + '.ico')
            if os.path.exists(final_favicon_path):
                fav_path = settings.STATIC_URL + 'favicons/{}.ico'.format(idd)
            else:
                fav_path = ""
            if create_dict:
                nlist.update({
                    index: {
                        'title': title,
                        'netloc': netloc,
                        'url': url,
                        'edit-bookmark': base_et,
                        'remove-url': base_remove,
                        'timestamp': timestamp,
                        'tag': tag,
                        'move-bookmark': move_single,
                        'move-multi': move_multiple,
                        'usr': username,
                        'archive-media': archive_media,
                        'directory': directory,
                        'read-url': read_url,
                        'id': idd,
                        'fav-path': fav_path,
                        'media-element': media_element
                    }
                })
            else:
                nlist.append([
                    index, title, netloc, url, base_et, base_remove, timestamp,
                    tag, move_single, move_multiple, archive_media, directory,
                    read_url, idd, fav_path, media_element
                ])
            index += 1
        return nlist

    @staticmethod
    def format_link(lnk, url):
        ourl = urlparse(url)
        ourld = ourl.scheme + '://' + ourl.netloc
        if lnk and lnk != '#':
            if lnk.startswith('//'):
                lnk = ourl.scheme + ':' + lnk
            elif lnk.startswith('/'):
                lnk = ourld + lnk
            elif lnk.startswith('./'):
                lnk = url.rsplit('/', 1)[0] + lnk[1:]
            elif lnk.startswith('../'):
                lnk = url.rsplit('/', 2)[0] + lnk[2:]
            elif not lnk.startswith('http'):
                lnk = ourld + '/' + lnk
        return lnk

    @staticmethod
    def remove_url_link(usr, url_id=None, row=None):
        if row:
            url_id = row.id
        elif url_id:
            qlist = Library.objects.filter(usr=usr, id=url_id)
            if qlist:
                row = qlist[0]
        if row:
            media_path = row.media_path
            if media_path and os.path.exists(media_path):
                base_dir_url, file_name = os.path.split(media_path)
                base_dir_id, dir_id = os.path.split(base_dir_url)
                resource_dir = os.path.join(settings.ARCHIVE_LOCATION,
                                            'resources', str(url_id))
                if dir_id.isnumeric():
                    ndir_id = int(dir_id)
                    if ndir_id == url_id:
                        shutil.rmtree(base_dir_url)
                        logger.info('removing {}'.format(base_dir_url))
                    if os.path.exists(resource_dir):
                        shutil.rmtree(resource_dir)
                        logger.info('removing {}'.format(resource_dir))
            row.delete()

    @staticmethod
    def move_bookmarks(usr, request, url_id=None, single=True):
        msg = 'Nothing Moved'
        if single and url_id:
            move_to_dir = request.POST.get('move_to_dir', '')
            print(url_id, request.POST)
            if move_to_dir:
                Library.objects.filter(usr=usr,
                                       id=url_id).update(directory=move_to_dir)
            msg = 'Moved to {}'.format(move_to_dir)
        elif not single:
            move_to_dir = request.POST.get('move_to_dir', '')
            move_links = request.POST.get('move_links', '')
            if move_links:
                move_links_list = [
                    i.strip() for i in move_links.split(',') if i.strip()
                ]
            else:
                move_links_list = []
            if move_to_dir and move_links_list:
                for link in move_links_list:
                    if link.isnumeric():
                        link_id = int(link)
                        Library.objects.filter(
                            usr=usr, id=link_id).update(directory=move_to_dir)
            msg = 'Moved {1} links to {0}'.format(move_to_dir,
                                                  len(move_links_list))
        return msg

    @classmethod
    def group_links_actions(cls, usr, request, dirname, mode=None):
        msg = 'Nothing'
        links = request.POST.get('link_ids', '')
        link_tags = request.POST.get('link_tags', '')
        merge_dir = request.POST.get('merge_dir', '')
        if links:
            links_list = [i.strip() for i in links.split(',') if i.strip()]
        else:
            links_list = []
        if link_tags:
            tags_list = [i.strip() for i in link_tags.split(',') if i.strip()]
        else:
            tags_list = []

        qlist = UserSettings.objects.filter(usrid=usr)
        if qlist:
            set_row = qlist[0]
        else:
            set_row = None

        for link in links_list:
            if link.isnumeric():
                link_id = int(link)
                qset = Library.objects.filter(usr=usr, id=link_id)
                if qset:
                    row = qset[0]
                    if mode == 'archive':
                        cls.process_add_url(usr,
                                            row.url,
                                            dirname,
                                            archive_html=True,
                                            row=row,
                                            settings_row=set_row)
                    elif mode == 'tags' and tags_list:
                        cls.edit_tags(usr, row.id, ','.join(tags_list), '')
        if merge_dir and merge_dir != dirname and mode == 'merge':
            qlist = Library.objects.filter(usr=usr, directory=dirname)
            qlistm = Library.objects.filter(usr=usr, directory=merge_dir)
            merge_list = set([row.url for row in qlistm if row.url])
            for row in qlist:
                if not row.url or row.url in merge_list:
                    row.delete()
            Library.objects.filter(
                usr=usr, directory=dirname).update(directory=merge_dir)
        return msg

    @staticmethod
    def edit_bookmarks(usr, request, url_id):
        title = request.POST.get('new_title', '')
        nurl = request.POST.get('new_url', '')
        tags = request.POST.get('new_tags', '')
        tags_old = request.POST.get('old_tags', '')
        media_link = request.POST.get('media_link', '')
        print(url_id, request.POST)
        msg = 'Edited'
        if media_link and media_link == 'true':
            media_element = True
        else:
            media_element = False
        if title and nurl:
            Library.objects.filter(usr=usr, id=url_id).update(
                title=title, url=nurl, media_element=media_element)
            msg = msg + ' Title and Link'
        elif title:
            Library.objects.filter(usr=usr, id=url_id).update(
                title=title, media_element=media_element)
            msg = msg + ' Title'
        elif nurl:
            Library.objects.filter(usr=usr, id=url_id).update(
                url=nurl, media_element=media_element)
            msg = msg + ' Link'
        else:
            Library.objects.filter(
                usr=usr, id=url_id).update(media_element=media_element)

        if tags or tags_old:
            msg = DBAccess.edit_tags(usr, url_id, tags, tags_old)
        return msg

    @staticmethod
    def edit_tags(usr, url_id, tags, tags_old, old_row=None):
        tags_list = [i.lower().strip() for i in tags.split(',')]
        tags_list_library = ','.join(list(set(tags_list)))
        tags_list_old = [i.lower().strip() for i in tags_old.split(',')]
        tags_list = [i for i in tags_list if i]
        tags_list_old = [i for i in tags_list_old if i]
        all_tags = Tags.objects.filter(tag__in=tags_list)

        tags_new_add = set(tags_list) - set(tags_list_old)
        tags_old_delete = set(tags_list_old) - set(tags_list)
        insert_list = []
        for tag in tags_list:
            if not all_tags.filter(tag=tag).exists():
                insert_list.append(Tags(tag=tag))
            else:
                logger.info('Tag: {} exists'.format(tag))
        if insert_list:
            Tags.objects.bulk_create(insert_list)
        if old_row:
            lib_obj = old_row
        else:
            lib_list = Library.objects.filter(usr=usr, id=url_id)
            lib_obj = lib_list[0]
        lib_obj.tags = tags_list_library
        lib_obj.save()
        tagins_list = []
        for tag in tags_new_add:
            tag_obj = Tags.objects.filter(tag=tag)
            tagid = URLTags.objects.filter(usr_id=usr,
                                           url_id=lib_obj,
                                           tag_id=tag_obj[0])
            if not tagid:
                row = tagins_list.append(
                    URLTags(usr_id=usr, url_id=lib_obj, tag_id=tag_obj[0]))
        if tagins_list:
            URLTags.objects.bulk_create(tagins_list)

        for tag in tags_old_delete:
            tag_obj = Tags.objects.filter(tag=tag)
            tagid = URLTags.objects.filter(usr_id=usr,
                                           url_id=lib_obj,
                                           tag_id=tag_obj[0])
            if tagid:
                URLTags.objects.filter(usr_id=usr,
                                       url_id=lib_obj,
                                       tag_id=tag_obj[0]).delete()
        msg = ('Edited Tags: new-tags-addition={}::old-tags-delete={}'.format(
            tags_new_add, tags_old_delete))
        logger.info(msg)
        return msg
コード例 #28
0
class CustomRead:
    
    readable_format = [
        'text/plain', 'text/html', 'text/htm',
        'text/css', 'application/xhtml+xml',
        'application/xml', 'application/json',
    ]
    mtype_list = [
        'text/htm', 'text/html', 'text/plain'
    ]
    vnt_noblock = Vinanti(block=False, hdrs={'User-Agent':settings.USER_AGENT},
                  backend=settings.VINANTI_BACKEND,
                  max_requests=settings.VINANTI_MAX_REQUESTS)
    vnt = Vinanti(block=True, hdrs={'User-Agent':settings.USER_AGENT})
    fav_path = settings.FAVICONS_STATIC
    VIDEO_ID_DICT = OrderedDict()
    CACHE_FILE = os.path.join(settings.TMP_LOCATION, 'cache')
    ANNOTATION_SCRIPT = """
                var pageUri = function () {
            return {
            beforeAnnotationCreated: function (ann) {
                ann.uri = window.location.href;
            }
            };
            };
            
                var app = new annotator.App();
                var loc = '/annotate'
                var csrftoken = getCookie('csrftoken');
                app.include(annotator.ui.main, {element: document.body});
                app.include(annotator.storage.http, {prefix: loc, headers: {"X-CSRFToken": csrftoken} });
                app.include(pageUri);
                app.start().then(function () {
                app.annotations.load({uri: window.location.pathname});
                });

            function getCookie(name) {
                var cookieValue = null;
                if (document.cookie && document.cookie !== '') {
                    var cookies = document.cookie.split(';');
                    for (var i = 0; i < cookies.length; i++) {
                        var cookie = jQuery.trim(cookies[i]);
                        // Does this cookie string begin with the name we want?
                        if (cookie.substring(0, name.length + 1) === (name + '=')) {
                            cookieValue = decodeURIComponent(cookie.substring(name.length + 1));
                            break;
                        }
                    }
                }
                return cookieValue;
            };
    """
    
    @classmethod
    def get_archived_file(cls, usr, url_id, mode='html', req=None, return_path=False):
        qset = Library.objects.filter(usr=usr, id=url_id)
        streaming_mode = False
        if not os.path.exists(settings.TMP_LOCATION):
            os.makedirs(settings.TMP_LOCATION)
        if qset:
            row = qset[0]
            media_path = row.media_path
            if mode in ['pdf', 'png', 'html'] and media_path:
                fln, ext = media_path.rsplit('.', 1)
                if mode == 'pdf':
                    media_path = fln + '.pdf'
                elif mode == 'png':
                    media_path = fln + '.png'
                elif mode == 'html':
                    media_path = fln + '.htm'
            elif mode == 'archive' and media_path:
                mdir, _ = os.path.split(media_path)
                filelist = os.listdir(mdir)
                mlist = []
                extset = set(['pdf', 'png', 'htm', 'html', 'json'])
                for fl in filelist:
                    ext = fl.rsplit('.', 1)
                    if ext and ext[-1] not in extset:
                        mlist.append(os.path.join(mdir, fl))
                for mfile in mlist:
                    if os.path.isfile(mfile) and os.stat(mfile).st_size:
                        media_path = mfile
                        streaming_mode = True
                        break
                if streaming_mode and req:
                    qlist = UserSettings.objects.filter(usrid=usr)
                    if qlist and not qlist[0].media_streaming:
                        streaming_mode = False
                        
            if media_path and os.path.exists(media_path):
                mtype = guess_type(media_path)[0]
                if not mtype:
                    mtype = 'application/octet-stream'
                ext = media_path.rsplit('.')[-1]
                if ext:
                    filename = row.title + '.' + ext
                    if '.' in row.title:
                        file_ext = row.title.rsplit('.', 1)[-1]
                        if ext == file_ext:
                            filename = row.title
                else:
                    filename = row.title + '.bin'
                if mtype in ['text/html', 'text/htm']:
                    data = cls.format_html(row, media_path)
                    return HttpResponse(data)
                elif streaming_mode:
                    if os.path.isfile(cls.CACHE_FILE):
                        with open(cls.CACHE_FILE, 'rb') as fd:
                            cls.VIDEO_ID_DICT = pickle.load(fd)
                    uid = str(uuid.uuid4())
                    uid = uid.replace('-', '')
                    while uid in cls.VIDEO_ID_DICT:
                        logger.debug("no unique ID, Generating again")
                        uid = str(uuid.uuid4())
                        uid = uid.replace('-', '')
                        time.sleep(0.01)
                    cls.VIDEO_ID_DICT.update({uid:[media_path, time.time()]})
                    cls.VIDEO_ID_DICT.move_to_end(uid, last=False)
                    if len(cls.VIDEO_ID_DICT) > settings.VIDEO_PUBLIC_LIST:
                        cls.VIDEO_ID_DICT.popitem()
                    with open(cls.CACHE_FILE, 'wb') as fd:
                        pickle.dump(cls.VIDEO_ID_DICT, fd)
                    if return_path:
                        title_slug = slugify(row.title, allow_unicode=True)
                        if settings.ROOT_URL_LOCATION:
                            root_loc = settings.ROOT_URL_LOCATION
                            if root_loc.startswith('/'):
                                root_loc = root_loc[1:]
                            return '{}/{}/getarchivedvideo/{}-{}'.format(root_loc, usr.username, title_slug, uid)
                        else:
                            return '{}/getarchivedvideo/{}-{}'.format(usr.username, title_slug, uid)
                    else:
                        return cls.get_archived_video(req, usr.username, uid)
                else:
                    response = FileResponse(open(media_path, 'rb'))
                    mtype = 'video/webm' if mtype == 'video/x-matroska' else mtype
                    response['mimetype'] = mtype
                    response['content-type'] = mtype
                    response['content-length'] = os.stat(media_path).st_size
                    filename = filename.replace(' ', '.')
                    logger.info('{} , {}'.format(filename, mtype))
                    if not cls.is_human_readable(mtype) and not streaming_mode:
                        response['Content-Disposition'] = 'attachment; filename="{}"'.format(quote(filename))
                    return response
            else:
                back_path = req.path_info.rsplit('/', 1)[0] + '/read'
                return render(req, 'archive_not_found.html', {'path':back_path})
        else:
            return HttpResponse(status=404)
    
    @classmethod
    def get_archived_video(cls, request, username, video_id):
        if video_id in cls.VIDEO_ID_DICT:
            media_path, ltime = cls.VIDEO_ID_DICT.get(video_id)
            logger.debug('{} {}'.format(media_path, ltime))
            if time.time() - ltime <= settings.VIDEO_ID_EXPIRY_LIMIT*3600:
                if os.path.isfile(media_path):
                    mtype = guess_type(media_path)[0]
                    if not mtype:
                        mtype = 'application/octet-stream'
                    range_header = request.META.get('HTTP_RANGE', '').strip()
                    range_match = settings.RANGE_REGEX.match(range_header)
                    size = os.stat(media_path).st_size
                    if range_match:
                        first_byte, last_byte = range_match.groups()
                        first_byte = int(first_byte) if first_byte else 0
                        last_byte = int(last_byte) if last_byte else size - 1
                        if last_byte >= size:
                            last_byte = size - 1
                        length = last_byte - first_byte + 1
                        response = StreamingHttpResponse(
                            RangeFileResponse(open(media_path, 'rb'), offset=first_byte,
                            length=length), status=206, content_type=mtype
                        )
                        response['Content-Length'] = str(length)
                        response['Content-Range'] = 'bytes {}-{}/{}'.format(first_byte, last_byte, size)
                    else:
                        response = StreamingHttpResponse(FileResponse(open(media_path, 'rb')))
                        response['content-length'] = size
                    mtype = 'video/webm' if mtype == 'video/x-matroska' else mtype
                    response['content-type'] = mtype
                    response['mimetype'] = mtype
                    response['Accept-Ranges'] = 'bytes'
                    return response
        return HttpResponse(status=404)
    
    @classmethod
    def generate_archive_media_playlist(cls, server, usr, directory):
        qset = Library.objects.filter(usr=usr, directory=directory)
        pls_txt = '#EXTM3U\n'
        extset = set(['pdf', 'png', 'htm', 'html'])
        if not os.path.exists(settings.TMP_LOCATION):
            os.makedirs(settings.TMP_LOCATION)
        if os.path.isfile(cls.CACHE_FILE):
            with open(cls.CACHE_FILE, 'rb') as fd:
                cls.VIDEO_ID_DICT = pickle.load(fd)
        for row in qset:
            streaming_mode = False
            media_path = row.media_path
            media_element = row.media_element
            title = row.title
            if media_path and media_element:
                mdir, _ = os.path.split(media_path)
                filelist = os.listdir(mdir)
                mlist = []
                for fl in filelist:
                    ext = fl.rsplit('.', 1)
                    if ext and ext[-1] not in extset:
                        mlist.append(os.path.join(mdir, fl))
                for mfile in mlist:
                    if os.path.isfile(mfile) and os.stat(mfile).st_size:
                        media_path = mfile
                        streaming_mode = True
                        break
            if media_path and os.path.exists(media_path):
                mtype = guess_type(media_path)[0]
                if not mtype:
                    mtype = 'application/octet-stream'
                if streaming_mode:
                    uid = str(uuid.uuid4())
                    uid = uid.replace('-', '')
                    while uid in cls.VIDEO_ID_DICT:
                        logger.debug("no unique ID, Generating again")
                        uid = str(uuid.uuid4())
                        uid = uid.replace('-', '')
                        time.sleep(0.01)
                    cls.VIDEO_ID_DICT.update({uid:[media_path, time.time()]})
                    cls.VIDEO_ID_DICT.move_to_end(uid, last=False)
                    if len(cls.VIDEO_ID_DICT) > settings.VIDEO_PUBLIC_LIST:
                        cls.VIDEO_ID_DICT.popitem()
                    title_slug = slugify(title, allow_unicode=True)
                    if settings.ROOT_URL_LOCATION:
                        root_loc = settings.ROOT_URL_LOCATION
                        if root_loc.startswith('/'):
                            root_loc = root_loc[1:]
                        return_path = '{}/{}/{}/getarchivedvideo/{}-{}'.format(server, root_loc,
                                                                               usr.username,
                                                                               title_slug, uid)
                    else:
                        return_path = '{}/{}/getarchivedvideo/{}-{}'.format(server, usr.username, title_slug, uid)
                    pls_txt = pls_txt+'#EXTINF:0, {0}\n{1}\n'.format(title, return_path)
        with open(cls.CACHE_FILE, 'wb') as fd:
            pickle.dump(cls.VIDEO_ID_DICT, fd)
        uid = str(uuid.uuid4())
        uid = uid.replace('-', '')
        plfile = os.path.join(settings.TMP_LOCATION, uid)
        if not os.path.isfile(plfile):
            with open(plfile, 'wb') as fd:
                pickle.dump(pls_txt, fd)
        pls_path = '{}/{}/getarchivedplaylist/{}/playlist/{}'.format(settings.ROOT_URL_LOCATION, 
                                                            usr.username, directory, uid)
        logger.debug(pls_path)
        return pls_path
        
    @classmethod
    def read_customized(cls, usr, url_id, mode='read', req=None):
        qlist = Library.objects.filter(usr=usr, id=url_id).select_related()
        data = b"<html>Not Available</html>"
        mtype = 'text/html'
        if qlist:
            row = qlist[0]
            media_path = row.media_path
            if mode in ['read-default', 'read-dark', 'read-light', 'read-gray']:
                if mode == 'read-dark':
                    row.reader_mode = UserSettings.DARK
                elif mode == 'read-light':
                    row.reader_mode = UserSettings.LIGHT
                elif mode == 'read-gray':
                    row.reader_mode = UserSettings.GRAY
                else:
                    row.reader_mode = UserSettings.WHITE
                row.save()
            if media_path and os.path.exists(media_path):
                mtype = guess_type(media_path)[0]
                if mtype in cls.mtype_list or media_path.endswith(".bin") or media_path.endswith(".note"):
                    if media_path.endswith(".bin"):
                        html = media_path.rsplit(".", 1)[0] + ".htm"
                        if os.path.exists(html):
                            media_path = html
                            mtype = "text/html"
                    data = cls.format_html(row, media_path,
                                           custom_html=True)
                    if mtype == 'text/plain' or media_path.endswith(".bin") or media_path.endswith(".note"):
                        mtype = 'text/html'
            elif row.url:
                data = cls.get_content(row, url_id, media_path)
        response = HttpResponse()
        response['mimetype'] = mtype
        response['content-type'] = mtype
        response.write(data)
        return response

    @classmethod
    def read_customized_note(cls, usr, url_id, mode='read-note', req=None):
        qlist = Library.objects.filter(usr=usr, id=url_id).select_related()
        data = b"<html>Not Available</html>"
        mtype = 'text/html'
        if qlist:
            row = qlist[0]
            media_path = row.media_path
            if media_path and os.path.exists(media_path):
                data = cls.format_note(row, media_path)
                mtype = 'text/html'
        response = HttpResponse()
        response['mimetype'] = mtype
        response['content-type'] = mtype
        response.write(data)
        return response

    @classmethod
    def save_customized_note(cls, usr, url_id, mode='read-note', req=None):
        text = req.POST.get('edited_note', '')
        print(text)
        qlist = Library.objects.filter(usr=usr, id=url_id).select_related()
        data = b"<html>Not Available</html>"
        mtype = 'text/html'
        if qlist:
            row = qlist[0]
            media_path = row.media_path
            if media_path and os.path.exists(media_path):
                with open(media_path, "w") as f:
                    f.write(text)
                mtype = 'text/html'
        response = HttpResponse()
        response['mimetype'] = mtype
        response['content-type'] = mtype
        response.write(bytes("Saved", "utf-8"))
        return response

    @staticmethod
    def format_note(row, media_path):
        content = open(media_path, "r").read()
        if row:
            if '/' in row.directory:
                base_dir = '{}/{}/subdir/{}/{}'.format(settings.ROOT_URL_LOCATION,
                                                       row.usr.username, row.directory,
                                                       row.id)
            else:
                base_dir = '{}/{}/{}/{}'.format(settings.ROOT_URL_LOCATION,
                                                row.usr.username, row.directory,
                                                row.id)
            read_url = base_dir + '/read'
            read_pdf = base_dir + '/read-pdf'
            read_png = base_dir + '/read-png'
            read_html = base_dir + '/read-html'
        else:
            read_url = read_pdf = read_png = read_html = '#'
        card_bg = ''
        card_tab = ''
        if row.reader_mode == UserSettings.DARK:
            card_bg = 'text-white bg-dark'
            card_tab = 'bg-dark border-dark text-white'
        elif row.reader_mode == UserSettings.LIGHT:
            card_bg = 'bg-light'
        elif row.reader_mode == UserSettings.GRAY:
            card_bg = 'text-white bg-secondary'
            card_tab = 'bg-secondary border-secondary text-white'
        template = """
        <html>
            <head>
                <meta charset="utf-8">
                <title>{title}</title>
                <link rel="stylesheet" href="/static/css/bootstrap.min.css">
                <link rel="stylesheet" href="/static/css/bootstrap.min.css.map">
                <script src="/static/js/jquery-3.3.1.min.js"></script>
                <script src="/static/js/popper.min.js"></script>
                <script src="/static/js/bootstrap.min.js"></script>
                <link rel="stylesheet" href="/static/css/summernote-bs4.css">
                <script src="/static/js/summernote-bs4.js"></script>
                <meta name="viewport" content="width=device-width, initial-scale=1.0">
                <meta name="referrer" content="no-referrer">
            </head>
        <body>
            <div class="row px-4" id="summernote"></div>
            <div class="row px-4 py-4">
                <button id="save" class="btn btn-primary" onclick="save()" type="button"> Save </button>
            </div>
            <script> $('#summernote').summernote({{placeholder: "Text..", tabsize: 10, height: 500}});
            $("#summernote").summernote("code", `{content}`);
            var save = function() {{
              var markup = $('#summernote').summernote('code');
              var formdata = new FormData;
                formdata.append('edited_note', markup);
                var csrftoken = getCookie('csrftoken');
                var client = new postRequestUpload();
                var api_link = window.location.href + '-save';
                client.post(api_link, formdata, csrftoken, function(response) {{
                    console.log(response);
                }})
            }};

            function getCookie(name) {{
                var cookieValue = null;
                if (document.cookie && document.cookie !== '') {{
                    var cookies = document.cookie.split(';');
                    for (var i = 0; i < cookies.length; i++) {{
                        var cookie = jQuery.trim(cookies[i]);
                        // Does this cookie string begin with the name we want?
                        if (cookie.substring(0, name.length + 1) === (name + '=')) {{
                            cookieValue = decodeURIComponent(cookie.substring(name.length + 1));
                            break;
                        }}
                    }}
                }}
                return cookieValue;
            }};

            var postRequestUpload = function() {{
                this.post = function(url, params, token, callbak) {{
                    var http_req = new XMLHttpRequest();
                    http_req.onreadystatechange = function() {{ 
                        if (http_req.readyState == 4 && http_req.status == 200)
                            {{callbak(http_req.responseText);}}
                    }}
                    http_req.open( "POST", url, true );
                    http_req.setRequestHeader("X-CSRFToken", token);
                    http_req.send(params);
                }}
            }};
             </script>
            
        </body>
        </html>
        """.format(title="Notes", content=content)
        return template
    
    @classmethod
    def get_content(cls, row, url_id, media_path):
        data = ""
        req = cls.vnt.get(row.url)
        media_path_parent, _ = os.path.split(media_path)
        if not os.path.exists(media_path_parent):
            os.makedirs(media_path_parent)
        if req and req.content_type and req.html:
            mtype = req.content_type.split(';')[0].strip()
            if mtype in cls.mtype_list:
                content = req.html
                with open(media_path, 'w') as fd:
                    fd.write(content)
                data = cls.format_html(
                    row, media_path, content=content,
                    custom_html=True
                )
                fav_nam = str(url_id) + '.ico'
                final_favicon_path = os.path.join(cls.fav_path, fav_nam)
                if not os.path.exists(final_favicon_path):
                    cls.get_favicon_link(req.html, row.url,
                                         final_favicon_path)
        return data
    
    @classmethod
    def format_html(cls, row, media_path, content=None, custom_html=False):
        media_dir, file_path = os.path.split(media_path)
        resource_dir = os.path.join(settings.ARCHIVE_LOCATION, 'resources', str(row.id))
        resource_link = '{}/{}/{}/{}/{}'.format(settings.ROOT_URL_LOCATION,
                                                row.usr.username, row.directory,
                                                str(row.id), 'resources')
        if not os.path.exists(resource_dir):
            os.makedirs(resource_dir)
        if not content:
            content = ""
            with open(media_path, encoding='utf-8', mode='r') as fd:
                content = fd.read()
        soup = BeautifulSoup(content, 'lxml')
        for script in soup.find_all('script'):
            script.decompose()
        url_path = row.url
        ourl = urlparse(url_path)
        ourld = ourl.scheme + '://' + ourl.netloc
        link_list = soup.find_all(['a', 'link', 'img'])
        for link in link_list:
            if link.name == 'img':
                lnk = link.get('src', '')
            else:
                lnk = link.get('href', '')
            if lnk and lnk != '#':
                if link.name == 'img' or (link.name == 'link' and '.css' in lnk):
                    lnk = dbxs.format_link(lnk, url_path)
                    lnk_bytes = bytes(lnk, 'utf-8')
                    h = hashlib.sha256(lnk_bytes)
                    lnk_hash = h.hexdigest()
                    if link.name == 'img':
                        link['src'] = resource_link + '/' + lnk_hash
                        if custom_html:
                            link['class'] = 'img-thumbnail'
                    else:
                        lnk_hash = lnk_hash + '.css'
                        link['href'] = resource_link + '/' + lnk_hash
                    file_image = os.path.join(resource_dir, lnk_hash)
                    if not os.path.exists(file_image):
                        cls.vnt_noblock.get(lnk, out=file_image)
                        logger.info('getting file: {}, out: {}'.format(lnk, file_image))
                elif lnk.startswith('http'):
                    pass
                else:
                    nlnk = dbxs.format_link(lnk, url_path)
                    if link.name == 'img':
                        link['src'] = nlnk
                        if custom_html:
                            link['class'] = 'img-thumbnail'
                    else:
                        link['href'] = nlnk
        if custom_html:
            ndata = soup.prettify()
            if soup.title:
                title = soup.title.text
            else:
                title = row.url.rsplit('/')[-1]
            data = Document(ndata)
            data_sum = data.summary()
            if data_sum:
                nsoup = BeautifulSoup(data_sum, 'lxml')
                if nsoup.text.strip():
                    data = cls.custom_template(title, nsoup.prettify(), row)
                else:
                    data = cls.custom_soup(ndata, title, row)
            else:
                data = cls.custom_soup(ndata, title, row)
        else:
            new_tag = soup.new_tag("script", src="/static/js/jquery-3.3.1.min.js")
            soup.find("body").append(new_tag)
            new_tag = soup.new_tag("script", src="/static/js/annotator.min.js")
            soup.find("body").append(new_tag)
            new_tag = soup.new_tag("script")
            new_tag.append(cls.ANNOTATION_SCRIPT)
            soup.find("body").append(new_tag)
            data = soup.prettify()
        return bytes(data, 'utf-8')
        
    
    @classmethod
    def custom_template(cls, title, content, row):
        if row:
            if '/' in row.directory:
                base_dir = '{}/{}/subdir/{}/{}'.format(settings.ROOT_URL_LOCATION,
                                                        row.usr.username, row.directory,
                                                        row.id)
            else:
                base_dir = '{}/{}/{}/{}'.format(settings.ROOT_URL_LOCATION,
                                                row.usr.username, row.directory,
                                                row.id)
            read_url = base_dir + '/read'
            read_pdf = base_dir + '/read-pdf'
            read_png = base_dir + '/read-png'
            read_html = base_dir + '/read-html'
        else:
            read_url = read_pdf = read_png = read_html = '#'
        card_bg = ''
        card_tab = ''
        if row.reader_mode == UserSettings.DARK:
            card_bg = 'text-white bg-dark'
            card_tab = 'bg-dark border-dark text-white'
        elif row.reader_mode == UserSettings.LIGHT:
            card_bg = 'bg-light'
        elif row.reader_mode == UserSettings.GRAY:
            card_bg = 'text-white bg-secondary'
            card_tab = 'bg-secondary border-secondary text-white'
        template = """
        <html>
            <head>
                <meta charset="utf-8">
                <title>{title}</title>
                <link rel="stylesheet" href="/static/css/bootstrap.min.css">
                <meta name="viewport" content="width=device-width, initial-scale=1.0">
                <meta name="referrer" content="no-referrer">
            </head>
        <body>
            <div class="container-fluid">
                <div class="row">
                    <div class="col-sm {card_bg}">
                        <div class='card text-left {card_bg} mb-3'>
                            <div class='card-header'>
                                <ul class="nav nav-tabs card-header-tabs">
                                    <li class="nav-item">
                                        <a class="nav-link {card_tab} active" href="{read_url}">HTML</a>
                                    </li>
                                    <li class="nav-item">
                                        <a class="nav-link" href="{read_html}">Original</a>
                                    </li>
                                    <li class="nav-item">
                                        <a class="nav-link" href="{read_pdf}">PDF</a>
                                    </li>
                                    <li class="nav-item">
                                        <a class="nav-link" href="{read_png}">PNG</a>
                                    </li>
                                </ul>
                            </div>
                            
                            <div class='card-body'>
                                <h5 class="card-title">{title}</h5>
                                {content}
                            </div>
                        </div>
                    </div>
                </div>
            </div>
        <script src="/static/js/jquery-3.3.1.min.js"></script>
        <script src="/static/js/annotator.min.js"></script>
        <script>{annot_script}</script>
        </body>
        </html>
        """.format(title=title, content=content,
                   read_url=read_url, read_pdf=read_pdf,
                   read_png=read_png, read_html=read_html,
                   card_bg=card_bg, card_tab=card_tab,
                   annot_script=cls.ANNOTATION_SCRIPT)
        return template

    @classmethod
    def custom_soup(cls, data, title, row=None):
        soup = BeautifulSoup(data, 'lxml')
        text_result = soup.find_all(text=True)
        final_result = []
        for elm in text_result:
            ntag = ''
            ptag = elm.parent.name
            if ptag == 'a':
                href = elm.parent.get('href')
                ntag = '<a href="{}">{}</a>'.format(href, elm)
            elif ptag in ['body', 'html', '[document]', 'img']:
                pass
            elif ptag == 'p':
                ntag = '<p class="card-text">{}</p>'.format(elm)
            elif ptag == 'span':
                ntag = '<span class="card-text">{}</span>'.format(elm)
            elif '\n' in elm:
                ntag = '</br>';
            else:
                tag = elm.parent.name
                ntag = '<{tag}>{text}</{tag}>'.format(tag=tag, text=elm)
            if ntag:
                final_result.append(ntag)
        result = ''.join(final_result)
        result = re.sub(r'(</br>)+', '', result)
        content = cls.custom_template(title, result, row)
        return content
    
    @classmethod
    def get_favicon_link(cls, data, url_name, final_favicon_path):
        soup = BeautifulSoup(data, 'lxml')
        favicon_link = ''
        if not os.path.exists(final_favicon_path):
            links = soup.find_all('link')
            ilink = soup.find('link', {'rel':'icon'})
            slink = soup.find('link', {'rel':'shortcut icon'})
            if ilink:
                favicon_link = dbxs.format_link(ilink.get('href'), url_name)
            elif slink:
                favicon_link = dbxs.format_link(slink.get('href'), url_name)
            else:
                for i in links:
                    rel = i.get('href')
                    if (rel and (rel.endswith('.ico') or '.ico' in rel)):
                        favicon_link = dbxs.format_link(rel, url_name)
                if not favicon_link:
                    urlp = urlparse(url_name)
                    favicon_link = urlp.scheme + '://' + urlp.netloc + '/favicon.ico'
            if favicon_link:
                cls.vnt_noblock.get(favicon_link, out=final_favicon_path)
    
    @classmethod
    def is_human_readable(cls, mtype):
        human_readable = False
        if mtype in cls.readable_format:
            human_readable = True
        return human_readable
コード例 #29
0
 def __init__(self, hdrs):
     if hdrs:
         self.hdrs = hdrs
     else:
         self.hdrs = {'User-Agent': 'Mozilla/5.0'}
     self.vnt = Vinanti(block=False, hdrs=self.hdrs, timeout=10)
コード例 #30
0
ファイル: WebComics.py プロジェクト: kanishka-linux/WebComics
 def setupUi(self, MainWindow):
     global screen_height, screen_width
     MainWindow.setObjectName(_fromUtf8("MainWindow"))
     MainWindow.setEnabled(True)
     MainWindow.resize(800, 400)
     MainWindow.setMinimumSize(QtCore.QSize(0, 0))
     MainWindow.setMaximumSize(QtCore.QSize(900, 400))
     icon = QtGui.QIcon.fromTheme(_fromUtf8(""))
     MainWindow.setWindowIcon(icon)
     MainWindow.setLayoutDirection(QtCore.Qt.LeftToRight)
     self.centralwidget = QtWidgets.QWidget(MainWindow)
     self.centralwidget.setObjectName(_fromUtf8("centralwidget"))
     self.verticalLayout = QtWidgets.QVBoxLayout(self.centralwidget)
     self.verticalLayout.setObjectName(_fromUtf8("verticalLayout"))
     self.tabWidget = QtWidgets.QTabWidget(self.centralwidget)
     self.tabWidget.setObjectName(_fromUtf8("tabWidget"))
     self.tab = MyWidget(MainWindow)
     self.tab.setObjectName(_fromUtf8("tab"))
     self.horizontalLayout = QtWidgets.QHBoxLayout(self.tab)
     self.horizontalLayout.setObjectName(_fromUtf8("horizontalLayout"))
     self.label = QtWidgets.QLabel(self.tab)
     sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Expanding)
     sizePolicy.setHorizontalStretch(0)
     sizePolicy.setVerticalStretch(0)
     sizePolicy.setHeightForWidth(self.label.sizePolicy().hasHeightForWidth())
     self.label.setSizePolicy(sizePolicy)
     self.label.setMaximumSize(QtCore.QSize(900, 290))
     self.label.setText(_fromUtf8(""))
     self.label.setScaledContents(True)
     self.label.setObjectName(_fromUtf8("label"))
     self.horizontalLayout.addWidget(self.label)
     self.tabWidget.addTab(self.tab, _fromUtf8(""))
     self.tab_2 = QtWidgets.QWidget()
     self.tab_2.setObjectName(_fromUtf8("tab_2"))
     self.tabWidget.addTab(self.tab_2, _fromUtf8(""))
     self.verticalLayout.addWidget(self.tabWidget)
     self.frame = QtWidgets.QFrame(self.centralwidget)
     self.frame.setMinimumSize(QtCore.QSize(782, 60))
     self.frame.setMaximumSize(QtCore.QSize(782, 16777215))
     self.frame.setContextMenuPolicy(QtCore.Qt.ActionsContextMenu)
     self.frame.setFrameShape(QtWidgets.QFrame.NoFrame)
     self.frame.setFrameShadow(QtWidgets.QFrame.Raised)
     self.frame.setLineWidth(0)
     self.frame.setObjectName(_fromUtf8("frame"))
     self.prev = QtWidgets.QPushButton(self.frame)
     self.prev.setGeometry(QtCore.QRect(340, 20, 41, 21))
     self.prev.setObjectName(_fromUtf8("prev"))
     self.next = QtWidgets.QPushButton(self.frame)
     self.next.setGeometry(QtCore.QRect(410, 20, 41, 20))
     self.next.setObjectName(_fromUtf8("next"))
     self.date = QtWidgets.QDateEdit(self.frame)
     self.date.setGeometry(QtCore.QRect(620, 20, 110, 26))
     self.date.setCalendarPopup(True)
     self.date.setObjectName(_fromUtf8("date"))
     self.go = QtWidgets.QPushButton(self.frame)
     self.go.setGeometry(QtCore.QRect(740, 20, 20, 20))
     self.go.setObjectName(_fromUtf8("go"))
     self.btn1 = QtWidgets.QComboBox(self.frame)
     self.btn1.setGeometry(QtCore.QRect(30, 15, 110, 31))
     self.btn1.setObjectName(_fromUtf8("btn1"))
     
     self.btn1.addItem(_fromUtf8(""))
     self.btn1.addItem(_fromUtf8(""))
     self.btn1.addItem(_fromUtf8(""))
     self.btn1.addItem(_fromUtf8(""))
     self.btn2 = QtWidgets.QPushButton(self.frame)
     self.btn2.setGeometry(QtCore.QRect(160, 20, 61, 21))
     self.btn2.setObjectName(_fromUtf8("btn2"))
     self.btnM = QtWidgets.QPushButton(self.frame)
     self.btnM.setGeometry(QtCore.QRect(240, 20, 51, 21))
     self.btnM.setObjectName(_fromUtf8("btnM"))
     
     self.verticalLayout.addWidget(self.frame)
     MainWindow.setCentralWidget(self.centralwidget)
     self.menubar = QtWidgets.QMenuBar(MainWindow)
     self.menubar.setGeometry(QtCore.QRect(0, 0, 800, 24))
     self.menubar.setObjectName(_fromUtf8("menubar"))
     MainWindow.setMenuBar(self.menubar)
     self.statusbar = QtWidgets.QStatusBar(MainWindow)
     self.statusbar.setObjectName(_fromUtf8("statusbar"))
     MainWindow.setStatusBar(self.statusbar)
     
     self.scrollArea = QtGuiQWidgetScroll()
     self.scrollArea.setWidgetResizable(True)
     self.scrollArea.setMaximumSize(screen_width, screen_height-60)
     self.scrollArea.setObjectName(_fromUtf8("scrollArea"))
     self.scrollAreaWidgetContents = QtWidgets.QWidget()
     self.scrollAreaWidgetContents.setObjectName(_fromUtf8("scrollAreaWidgetContents"))
     self.vBox = QtWidgets.QVBoxLayout(self.scrollAreaWidgetContents)
     self.scrollArea.setWidget(self.scrollAreaWidgetContents)
     self.labelExp = QtWidgets.QLabel(self.scrollAreaWidgetContents)
     self.labelExp.setObjectName(_fromUtf8("labelExp"))
     self.labelExp.setScaledContents(True)
     self.vBox.addWidget(self.labelExp)
     
     self.horizontalLayout_2 = QtWidgets.QHBoxLayout(self.tab_2)
     self.horizontalLayout_2.setObjectName(_fromUtf8("horizontalLayout_2"))
     self.listComics = QtWidgets.QListWidget(self.tab_2)
     self.listComics.setObjectName(_fromUtf8("listComics"))
     self.horizontalLayout_2.addWidget(self.listComics)
     self.retranslateUi(MainWindow)
     self.tabWidget.setCurrentIndex(0)
     self.btn1.currentIndexChanged['QString'].connect(self.comics)
     self.listComics.itemDoubleClicked['QListWidgetItem*'].connect(self.addComics)
     self.prev.clicked.connect(self.previous)
     self.next.clicked.connect(self.nxt)
     self.go.clicked.connect(self.goto_direct)
     self.btn2.clicked.connect(partial(self.zoom_image))
     self.btnM.clicked.connect(self.loadMoreComics)
     QtCore.QMetaObject.connectSlotsByName(MainWindow)
     self.hdrs = {'User-Agent':USER_AGENT}
     self.vnt = Vinanti(block=False, hdrs=self.hdrs)
     self.base_url = None
     self.name = None
     self.picn = None
     self.home_comics = None
     self.cur_date = None
コード例 #31
0
 def test_session_mix_aio(self):
     data_dict = {'hello': 'world', 'world': 'hello'}
     vnt = Vinanti(block=self.block,
                   onfinished=hello,
                   hdrs=self.hdr,
                   method='POST',
                   data=data_dict,
                   group_task=True,
                   backend='aiohttp')
     vnt.post('http://www.httpbin.org/post')
     vnt.add('http://www.httpbin.org/post', data={'clrs': 'algo'})
     vnt.add('http://www.httpbin.org/post', data={'ast': 'OS'})
     vnt.add('http://www.httpbin.org/post',
             data={'tma': 'calc'},
             hdrs={'user-agent': 'curl'})
     vnt.add('http://www.httpbin.org/get',
             method='GET',
             params={
                 'hp': 'ca',
                 'ahu': 'tfcs'
             })
     vnt.add('http://httpbin.org/get', method='HEAD', onfinished=namaste)
     vnt.add('http://httpbin.org/ip', method='GET', onfinished=namaste)
     vnt.start()
コード例 #32
0
ファイル: WebComics.py プロジェクト: kanishka-linux/WebComics
class Ui_MainWindow(object):
    def setupUi(self, MainWindow):
        global screen_height, screen_width
        MainWindow.setObjectName(_fromUtf8("MainWindow"))
        MainWindow.setEnabled(True)
        MainWindow.resize(800, 400)
        MainWindow.setMinimumSize(QtCore.QSize(0, 0))
        MainWindow.setMaximumSize(QtCore.QSize(900, 400))
        icon = QtGui.QIcon.fromTheme(_fromUtf8(""))
        MainWindow.setWindowIcon(icon)
        MainWindow.setLayoutDirection(QtCore.Qt.LeftToRight)
        self.centralwidget = QtWidgets.QWidget(MainWindow)
        self.centralwidget.setObjectName(_fromUtf8("centralwidget"))
        self.verticalLayout = QtWidgets.QVBoxLayout(self.centralwidget)
        self.verticalLayout.setObjectName(_fromUtf8("verticalLayout"))
        self.tabWidget = QtWidgets.QTabWidget(self.centralwidget)
        self.tabWidget.setObjectName(_fromUtf8("tabWidget"))
        self.tab = MyWidget(MainWindow)
        self.tab.setObjectName(_fromUtf8("tab"))
        self.horizontalLayout = QtWidgets.QHBoxLayout(self.tab)
        self.horizontalLayout.setObjectName(_fromUtf8("horizontalLayout"))
        self.label = QtWidgets.QLabel(self.tab)
        sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Expanding)
        sizePolicy.setHorizontalStretch(0)
        sizePolicy.setVerticalStretch(0)
        sizePolicy.setHeightForWidth(self.label.sizePolicy().hasHeightForWidth())
        self.label.setSizePolicy(sizePolicy)
        self.label.setMaximumSize(QtCore.QSize(900, 290))
        self.label.setText(_fromUtf8(""))
        self.label.setScaledContents(True)
        self.label.setObjectName(_fromUtf8("label"))
        self.horizontalLayout.addWidget(self.label)
        self.tabWidget.addTab(self.tab, _fromUtf8(""))
        self.tab_2 = QtWidgets.QWidget()
        self.tab_2.setObjectName(_fromUtf8("tab_2"))
        self.tabWidget.addTab(self.tab_2, _fromUtf8(""))
        self.verticalLayout.addWidget(self.tabWidget)
        self.frame = QtWidgets.QFrame(self.centralwidget)
        self.frame.setMinimumSize(QtCore.QSize(782, 60))
        self.frame.setMaximumSize(QtCore.QSize(782, 16777215))
        self.frame.setContextMenuPolicy(QtCore.Qt.ActionsContextMenu)
        self.frame.setFrameShape(QtWidgets.QFrame.NoFrame)
        self.frame.setFrameShadow(QtWidgets.QFrame.Raised)
        self.frame.setLineWidth(0)
        self.frame.setObjectName(_fromUtf8("frame"))
        self.prev = QtWidgets.QPushButton(self.frame)
        self.prev.setGeometry(QtCore.QRect(340, 20, 41, 21))
        self.prev.setObjectName(_fromUtf8("prev"))
        self.next = QtWidgets.QPushButton(self.frame)
        self.next.setGeometry(QtCore.QRect(410, 20, 41, 20))
        self.next.setObjectName(_fromUtf8("next"))
        self.date = QtWidgets.QDateEdit(self.frame)
        self.date.setGeometry(QtCore.QRect(620, 20, 110, 26))
        self.date.setCalendarPopup(True)
        self.date.setObjectName(_fromUtf8("date"))
        self.go = QtWidgets.QPushButton(self.frame)
        self.go.setGeometry(QtCore.QRect(740, 20, 20, 20))
        self.go.setObjectName(_fromUtf8("go"))
        self.btn1 = QtWidgets.QComboBox(self.frame)
        self.btn1.setGeometry(QtCore.QRect(30, 15, 110, 31))
        self.btn1.setObjectName(_fromUtf8("btn1"))
        
        self.btn1.addItem(_fromUtf8(""))
        self.btn1.addItem(_fromUtf8(""))
        self.btn1.addItem(_fromUtf8(""))
        self.btn1.addItem(_fromUtf8(""))
        self.btn2 = QtWidgets.QPushButton(self.frame)
        self.btn2.setGeometry(QtCore.QRect(160, 20, 61, 21))
        self.btn2.setObjectName(_fromUtf8("btn2"))
        self.btnM = QtWidgets.QPushButton(self.frame)
        self.btnM.setGeometry(QtCore.QRect(240, 20, 51, 21))
        self.btnM.setObjectName(_fromUtf8("btnM"))
        
        self.verticalLayout.addWidget(self.frame)
        MainWindow.setCentralWidget(self.centralwidget)
        self.menubar = QtWidgets.QMenuBar(MainWindow)
        self.menubar.setGeometry(QtCore.QRect(0, 0, 800, 24))
        self.menubar.setObjectName(_fromUtf8("menubar"))
        MainWindow.setMenuBar(self.menubar)
        self.statusbar = QtWidgets.QStatusBar(MainWindow)
        self.statusbar.setObjectName(_fromUtf8("statusbar"))
        MainWindow.setStatusBar(self.statusbar)
        
        self.scrollArea = QtGuiQWidgetScroll()
        self.scrollArea.setWidgetResizable(True)
        self.scrollArea.setMaximumSize(screen_width, screen_height-60)
        self.scrollArea.setObjectName(_fromUtf8("scrollArea"))
        self.scrollAreaWidgetContents = QtWidgets.QWidget()
        self.scrollAreaWidgetContents.setObjectName(_fromUtf8("scrollAreaWidgetContents"))
        self.vBox = QtWidgets.QVBoxLayout(self.scrollAreaWidgetContents)
        self.scrollArea.setWidget(self.scrollAreaWidgetContents)
        self.labelExp = QtWidgets.QLabel(self.scrollAreaWidgetContents)
        self.labelExp.setObjectName(_fromUtf8("labelExp"))
        self.labelExp.setScaledContents(True)
        self.vBox.addWidget(self.labelExp)
        
        self.horizontalLayout_2 = QtWidgets.QHBoxLayout(self.tab_2)
        self.horizontalLayout_2.setObjectName(_fromUtf8("horizontalLayout_2"))
        self.listComics = QtWidgets.QListWidget(self.tab_2)
        self.listComics.setObjectName(_fromUtf8("listComics"))
        self.horizontalLayout_2.addWidget(self.listComics)
        self.retranslateUi(MainWindow)
        self.tabWidget.setCurrentIndex(0)
        self.btn1.currentIndexChanged['QString'].connect(self.comics)
        self.listComics.itemDoubleClicked['QListWidgetItem*'].connect(self.addComics)
        self.prev.clicked.connect(self.previous)
        self.next.clicked.connect(self.nxt)
        self.go.clicked.connect(self.goto_direct)
        self.btn2.clicked.connect(partial(self.zoom_image))
        self.btnM.clicked.connect(self.loadMoreComics)
        QtCore.QMetaObject.connectSlotsByName(MainWindow)
        self.hdrs = {'User-Agent':USER_AGENT}
        self.vnt = Vinanti(block=False, hdrs=self.hdrs)
        self.base_url = None
        self.name = None
        self.picn = None
        self.home_comics = None
        self.cur_date = None
        
    def retranslateUi(self, MainWindow):
        MainWindow.setWindowTitle(_translate("MainWindow", "Read Comics", None))
        self.tabWidget.setTabText(self.tabWidget.indexOf(self.tab), _translate("MainWindow", "Tab 1", None))
        self.tabWidget.setTabText(self.tabWidget.indexOf(self.tab_2), _translate("MainWindow", "Tab 2", None))
        self.btn1.setItemText(0, _translate("MainWindow", "Select", None))
        self.btn1.setItemText(1, _translate("MainWindow", "Calvin", None))
        self.btn1.setItemText(2, _translate("MainWindow", "Garfield", None))
        self.btn1.setItemText(3, _translate("MainWindow", "OneBigHappy", None))
        self.date.setDisplayFormat(_translate("MainWindow", "yyyy/MM/dd", None))
        self.next.setText(_translate("MainWindow", "N", None))
        self.prev.setText(_translate("MainWindow", "P", None))
        self.go.setText(_translate("MainWindow", "Go", None))
        self.btn2.setText(_translate("MainWindow", "Original", None))
        self.btnM.setText(_translate("MainWindow", "More", None))
        self.btn2.setToolTip(_translate("MainWindow", "<html><head/><body><p>Show Original Image Size</p></body></html>", None))
        
    def loadMoreComics(self):
        self.tabWidget.setCurrentIndex(1)
        comics_list = os.path.join(home_comics, 'config.txt')
        with open(comics_list, 'r') as f:
            lines = f.readlines()
        lines = [i.strip() for i in lines if i.strip()]
        if self.listComics.count() == 0:
            MainWindow.setWindowTitle('Wait..')
            url = "http://www.gocomics.com/comics/a-to-z"
            self.vnt.get(url, onfinished=partial(self.more_comics, lines))
                            
    def more_comics(self, lines, *args):
        MainWindow.setWindowTitle('Select Comics')
        content = args[-1].html
        soup = BeautifulSoup(content, 'html.parser')
        links = soup.findAll('a')
        for i in links:
            j = i.get('href')
            if j:
                last = j.rsplit('/')[-1]
                if last.isnumeric():
                    karr = j.split('/')
                    if len(karr) > 1:
                        k = karr[1]
                    else:
                        k = None
                    if k in lines:
                        self.listComics.addItem('#'+k)
                    elif k:
                        self.listComics.addItem(k)
    
    def fetch_comics(self, base_url, dt):
        t = re.sub('/', '-', dt)
        picn = os.path.join(self.home_comics, '{}-{}.jpg'.format(self.name, t))
        logger.debug(picn)
        self.picn = picn
        if not os.path.isfile(picn):
            MainWindow.setWindowTitle('Wait..')
            url = base_url + dt
            self.vnt.get(url, onfinished=partial(self.process_page, dt, picn))
            logger.debug(url)
        else:
            img = QtGui.QPixmap(picn, "1")
            self.label.setPixmap(img)
            title = '{} {}'.format(self.name, dt)
            MainWindow.setWindowTitle(title)
            self.scrollArea.setWindowTitle(title)
            if not self.scrollArea.isHidden():
                self.zoom_image(picn)
        
    def process_page(self, *args):
        content = args[-1].html
        m = re.findall('data-image="http[^"]*', content)
        logger.debug(m)
        dt = args[0]
        picn = args[1]
        for j, i in enumerate(m):
            m[j] = re.sub('data-image="', "", i)
        logger.debug(m)
        if len(m) > 0:
            try:
                url = m[1]
            except:
                url = m[0]
            self.vnt.get(url, onfinished=partial(self.set_picture, picn, dt), out=picn)
            logger.debug('processing page')
        else:
            MainWindow.setWindowTitle('Comic strip not available for this date')

    def set_picture(self, *args):
        picn = args[0]
        dt = args[1]
        if os.path.isfile(picn):
            title = '{} {}'.format(self.name, dt)
            self.scrollArea.setWindowTitle(title)
            MainWindow.setWindowTitle(title)
            img = QtGui.QPixmap(picn, "1")
            self.label.setPixmap(img)
            if not self.scrollArea.isHidden():
                self.zoom_image(picn)
        else:
            MainWindow.setWindowTitle('Comic strip not available for this date')
        logger.debug('setting-picture')
    
    def addComics(self):
        comics_list = os.path.join(home_comics, 'config.txt')
        r = self.listComics.currentRow()
        item = self.listComics.item(r)
        if item:
            txt = item.text()
            if not txt.startswith('#'):
                if os.stat(comics_list).st_size == 0:
                    with open(comics_list, 'w') as f:
                        f.write(txt)
                    self.btn1.addItem(txt)
                    self.listComics.takeItem(r)
                    del item
                    self.listComics.insertItem(r, '#'+txt)
                    self.listComics.setCurrentRow(r)
                else:
                    lines = []
                    with open(comics_list, 'r') as f:
                        lines = f.readlines()
                    lines = [i.strip() for i in lines if i.strip()]
                    if txt not in lines:
                        with open(comics_list, 'a') as f:
                            f.write('\n'+txt)
                        self.btn1.addItem(txt)
                    self.listComics.takeItem(r)
                    del item
                    self.listComics.insertItem(r, '#'+txt)
                    self.listComics.setCurrentRow(r)
            else:
                lines = []
                txt = txt.replace('#', '')
                with open(comics_list, 'r') as f:
                    lines = f.readlines()
                lines = [i.strip() for i in lines if i.strip()]
                new_lines = []
                for i, j in enumerate(lines):
                    if txt != j:
                        new_lines.append(j)
                with open(comics_list, 'w') as f:
                    for i, j in enumerate(new_lines):
                        if i == 0:
                            f.write(j)
                        else:
                            f.write('\n'+j)
                self.listComics.takeItem(r)
                del item
                self.listComics.insertItem(r, txt)
                self.listComics.setCurrentRow(r)
                self.btn1.clear()
                original_list = ['Select', 'Calvin', 'Garfield', 'OneBigHappy']
                new_list = original_list + new_lines
                for i in new_list:
                    self.btn1.addItem(i)
                    
    def comics(self):
        self.name = str(self.btn1.currentText())
        if self.name != "Select" and self.name:
            self.tabWidget.setCurrentIndex(0)
            self.home_comics = os.path.join(home_comics, self.name)
            if not os.path.exists(self.home_comics):
                os.makedirs(self.home_comics)
            if self.name == "Calvin":
                self.base_url = "http://www.gocomics.com/calvinandhobbes/"
            elif self.name == "Garfield":
                self.base_url = "http://www.gocomics.com/garfield/"
            elif self.name == "OneBigHappy":
                self.base_url = "http://www.gocomics.com/onebighappy/"
            else:
                self.base_url = "http://www.gocomics.com/"+self.name+'/'
            self.goto_page()
        
    def zoom_image(self, picn=None):
        global screen_width, screen_height
        logger.debug(picn)
        try:
            if not picn:
                picn = self.picn
            if os.path.isfile(picn):
                im = Image.open(picn)
                w, h = im.size
                img = QtGui.QPixmap(picn, "1")
                self.labelExp.setPixmap(img)
                QtWidgets.QApplication.processEvents()
                print (w, screen_width, h, screen_height)
                if w < screen_width:
                    wd = w+20
                else:
                    wd = screen_width
                if h < screen_height:
                    ht = h + 20
                else:
                    ht = screen_height - 60
                self.scrollArea.resize(wd, ht)
                self.scrollArea.show()
        except Exception as err:
            logger.error(err)
        
    def goto_direct(self):
        today = datetime.date(self.date.date().year(), self.date.date().month(), self.date.date().day())
        td = re.sub('-', '/', str(today))
        print (td)
        self.fetch_comics(self.base_url, td)  
        
    def goto_page(self):
        self.vnt.get(self.base_url, onfinished=self.process_go_page)
          
    def process_go_page(self, *args):
        content = args[-1].html
        base_url = args[-2]
        logger.debug('{} {}'.format(args[-2], args[-3]))
        try:
            soup = BeautifulSoup(content, 'html.parser')
            link = soup.find('div', {'class':'feature'})
            link1 = link.find('h1')
            link2 = link1.find('a')['href']
            l = link2.split('/')
            td = l[-3]+'/'+l[-2]+'/'+l[-1] 
            logger.debug(td)
            self.cur_date = datetime.date(int(l[-3]), int(l[-2]), int(l[-1]))
        except Exception as err:
            today = datetime.date(self.date.date().year(), self.date.date().month(), self.date.date().day())
            td = re.sub('-', '/', str(today))
            logger.debug(td)
        self.fetch_comics(base_url, td)
        self.tab.setFocus() 
        logger.debug('process_go_page')
    
    def previous(self):
        today = datetime.date(self.date.date().year(), self.date.date().month(), self.date.date().day())
        day = datetime.timedelta(days=1)
        yday = today - day
        self.date.setDate(yday)
        td = re.sub('-', '/', str(yday))
        logger.debug(td)
        self.fetch_comics(self.base_url, td)
    
    def nxt(self):
        today = datetime.date(self.date.date().year(), self.date.date().month(), self.date.date().day())
        day = datetime.timedelta(days=1)
        tm = today + day
        if tm <= self.cur_date:
            self.date.setDate(tm)
            td = re.sub('-', '/', str(tm))
            print (td)
            self.fetch_comics(self.base_url, td)