Ejemplo n.º 1
0
 def download(self, url, fullpath, item):
     try:
         df = Download(url, fullpath)
         df.download()
     except urllib.error.HTTPError:
         print("Http Error, skipping")
         return False
     self.check_id3_edit(item.podcast.id, fullpath, item)
     return
Ejemplo n.º 2
0
 def download(self, url, fullpath, item):
     try:
         df = Download(url, fullpath)
         df.download()
     except urllib.error.HTTPError:
         print("Http Error, skipping")
         return False
     self.check_id3_edit(item.podcast.id, fullpath, item)
     return
Ejemplo n.º 3
0
 def downloadMp4ByURL(self, stdscr, format):
     stdscr.addstr(0, 0, "Enter YouTube URL: (hit Ctrl-G to download)")
     editwin = curses.newwin(1,80, 2,1)
     rectangle(stdscr, 1,0, 1+1+1, 1+80+1)
     stdscr.refresh()
     box = Textbox(editwin)
     box.edit()
     url = box.gather()
     if format == "MP3 DOWNLOAD":
         Download.mp3Download(url, "test")
     elif format == "MP4 DOWNLOAD":
         Download.mp4Download(url, "test")
Ejemplo n.º 4
0
    def __init__(self, local_file='', url='', parent_queue=None, tokens_per_second=5):
        #Call super class
        Download.__init__(self, local_file, url, parent_queue, tokens_per_second)
        print (local_file)
        self.file_handle = open(str(local_file), "wb",0)

        self.size = 0
        self.r = None
        self.so_far = 0
        self.url = urllib.parse.urlparse(url)
        print(self.url.netloc)
        self.http_conn = http.client.HTTPConnection(self.url.netloc)
Ejemplo n.º 5
0
def downloadAll(id):
    keys = ['geo','id','title']
    dlist = []
    for k in keys:
        fname = gconfig.metadatadir + '/%s/%s.json' % (id,k)
        fr = open(fname)
        data = json.loads(fr.read())
        for photo in data['photos']:
            dlist.append(photo['photo'])
        fr.close()
            
    db = Download(gconfig.tmpdir + '/%s' % id)
    db.download(dlist)
Ejemplo n.º 6
0
 def download_list(self, url_list: list):
     for item in url_list:
         t.log_message('Downloading: ' + item)
         try:
             download = Download(item, retries=5)
             download.download()
             path = os.path.abspath(download.download_path)
             _, extension = os.path.splitext(path)
             if extension[1:] in dict(shutil.get_archive_formats()).keys(
             ) and self.config.extract_archives:
                 shutil.unpack_archive(path)
         except Exception as e:
             t.log_message("ERROR. Download: " + item + ' FAILED.\n' +
                           str(e))
Ejemplo n.º 7
0
 def test_download_file(self):
     url = URL + TEST_FILE
     dl = Download(url)
     dl.start()
     
     # test it created the file
     self.assertTrue(os.path.isfile(TEST_FILE))
     
     # test it's the same file
     sum = self.md5sum(TEST_FILE)
     self.assertEqual(sum, TEST_FILE)
     
     # test metrics
     self.assertTrue(dl.kbps > 0)
Ejemplo n.º 8
0
def download(media_url, folder, audio=True, video=True, subtitles=False, **kwargs):
    """ Download the streams for the media url.

    :param str media_url: the url.
    :param str folder: the local folder for the output.
    :param bool audio: download audio.
    :param bool video: download video.
    :param bool subtitles: download subtitles.
    :param kwargs: additional video properties (see `streams`).
    :return: a `Download` instance.
    """
    streams_dict = streams(media_url, **kwargs)
    md = metadata(media_url)
    fn = utils.make_filename(md['title'])
    if not video:
        if not streams_dict[0]:
            print('Sorry, audio only is not available for this stream.')
            return None
        streams_dict[1] = None
    if not audio:
        streams_dict[0] = None
    filepath = os.path.join(folder, fn)
    if subtitles:
        subs = _subtitles(media_url, lang=subtitles)
        if subs:
            with open(f'{filepath}.srt', 'w') as f:
                f.write(subs)
    d = Download(streams_dict, f'{filepath}', utils.merge_files)
    return d
Ejemplo n.º 9
0
 def is_downloaded(self, url, filename):
     if not os.path.exists(filename):
         return False
     try:
         df = Download(url, filename)
         filesize = df.get_url_file_size()
     except urllib.error.HTTPError:
         print("HTTTP Error Skipping")
         return True
     if not filesize:
         return False
     elif int(filesize) > os.path.getsize(filename):
         print("filesize mismatch: %s %s" %
               (filesize, os.path.getsize(filename)))
         return False
     else:
         return True
Ejemplo n.º 10
0
def main():
    create_directory()

    songName = input("Enter the song name: ")
    link = Link(songName)
    details = link.song_list()

    download = Download(details[0], details[1])
Ejemplo n.º 11
0
 def is_downloaded(self, url, filename):
     if not os.path.exists(filename):
         return False
     try:
         df = Download(url, filename)
         filesize = df.get_url_file_size()
     except urllib.error.HTTPError:
         print("HTTTP Error Skipping")
         return True
     if not filesize:
         return False
     elif int(filesize) > os.path.getsize(filename):
         print("filesize mismatch: %s %s" % (filesize,
                                             os.path.getsize(filename)))
         return False
     else:
         return True
Ejemplo n.º 12
0
    def apply(self) -> None:
        """ Set current image as wallpaper """
        for file in CURRENT_DIR.iterdir():
            file.unlink()
            logger.debug(f'Deleted {short_path(file)}')

        image_id: str = self.sw.current_image_id()
        info: Dict[str, str] = image_info(image_id)
        image: Path = Path(info['image_id'] + info['extension'])

        self.progressbar.show()
        download = Download(image,
                            CURRENT_DIR,
                            info['full_image_url'],
                            stream=True)
        download.finished_chunk.connect(self.set_progressbar)
        download.finished_file.connect(set_wall)
        download.save()
Ejemplo n.º 13
0
    def save(self) -> None:
        """
        Save image to CURRENT_DIR
        When image is saved, show Saved label
        """
        image_id: str = self.sw.current_image_id()
        info: Dict[str, str] = image_info(image_id)
        image: Path = Path(info['image_id'] + info['extension'])

        self.progressbar.show()
        download = Download(image,
                            SAVED_DIR,
                            info['full_image_url'],
                            stream=True)
        download.finished_chunk.connect(self.set_progressbar)
        download.save()

        # Show message "Saved" for 3 seconds in info layout
        self.info_layout.insertWidget(2, self.saved_msg)
        self.saved_msg.show()
        QTimer.singleShot(3000, self.hide_msg)

        save_msg: bool = config.getboolean('Program', 'show_save_message')

        def disable_save_msg():
            config['Program']['show_save_message'] = 'no'
            config_save()
            logger.debug('Save message is now disabled')

        # Create and show "save message box" if it is set to True
        if save_msg:
            msgBox = QMessageBox(self)
            msgBox.setIcon(QMessageBox.Information)
            msgBox.setText('Saved')
            msgBox.setInformativeText(
                f'The image has been saved to \n{str(SAVED_DIR)}')
            msgBox.setStandardButtons(QMessageBox.Ok)
            dontshow_btn = msgBox.addButton("Don't show again",
                                            QMessageBox.ActionRole)
            dontshow_btn.clicked.connect(disable_save_msg)
            msgBox.exec_()
Ejemplo n.º 14
0
def link_crawler(
        seed_url,
        link_regex=None,
        delay=5,
        max_depth=-1,
        max_urls=-1,
        user_agent='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
        proxies=None,
        num_retries=1,
        scrape_callback=None,
        cache=None):
    crawl_queue = [seed_url]

    seen = {seed_url: 0}
    num_urls = 0
    rp = get_robots(seed_url)
    D = Download(delay=delay,
                 user_agent=user_agent,
                 proxies=proxies,
                 num_retries=num_retries,
                 cache=cache)

    while crawl_queue:
        url = crawl_queue.pop()
        depth = seen[url]

        if rp.can_fetch(user_agent, url):
            html = D(url)
            links = []
            if not re.search(r'(/?next=|//login?)', url):
                if scrape_callback:
                    links.extend(scrape_callback(url, html) or [])

                if depth != max_depth:
                    if link_regex:
                        links.extend(
                            urljoin(seed_url, link) for link in get_links(html)
                            if re.search(link_regex, link))

                    for link in links:
                        link = normalize(seed_url, link)

                        if link not in seen:
                            seen[link] = depth + 1

                            if same_domain(seed_url, link):
                                if not re.search(r'(/?next=|//login?)', link):
                                    crawl_queue.append(link)
                num_urls += 1
                if num_urls == max_urls:
                    break
        else:
            print('Blocked by robots.txt', url)
Ejemplo n.º 15
0
 def _fetch_CDN_(self, resp):
     if 'alt="Upgrade to Pornhub Premium to enjoy this video."' in resp:
         #upgrade to premium message with nothing to fetch just remove that link from file and move on
         return True
     if 'var player_quality_' in resp:
         p720 = resp.find('var player_quality_720p = \'')
         if p720 == -1:
             p420 = resp.find('var player_quality_480p = \'')
             if p420 == -1:
                 p240 = resp.find('var player_quality_240p = \'')
                 if p240 == -1:
                     #nothing is there
                     print(
                         "\n[None] No Video Format could be found -- Removing the Link"
                     )
                     return True
                 else:
                     print("[FETCHED -- 240px]")
                     start = p240 + 27
                     end = resp.find('\'', p240 + 30)
             else:
                 print("[FETCHED -- 420px]")
                 start = p420 + 27
                 end = resp.find('\'', p420 + 30)
         else:
             print("[FETCHED -- 720px]")
             start = p720 + 27
             end = resp.find('\'', p720 + 30)
         #print resp[start:end]
         file_name = BeautifulSoup(resp, "html.parser")
         file_name = str(file_name.title.string)
         file_name = file_name.translate(None, "'*:\"\/?<>|")
         download = Download(resp[start:end], "%s.mp4" % (file_name))
         download = download.now()
         if download:
             return True
         return False
     else:
         pass
Ejemplo n.º 16
0
 def build(self):
     '''
     Build packages from queue
     '''
     packages = self.packages()
     if packages:
         for pkg in packages:
             if not os.path.exists(build_path):
                 os.mkdir(build_path)
             sbo_url = sbo_search_pkg(pkg)
             sbo_dwn = SBoLink(sbo_url).tar_gz()
             source_dwn = SBoGrep(pkg).source().split()
             sources = []
             os.chdir(build_path)
             script = sbo_dwn.split("/")[-1]
             Download(build_path, sbo_dwn).start()
             for src in source_dwn:
                 Download(build_path, src).start()
                 sources.append(src.split("/")[-1])
             BuildPackage(script, sources, build_path).build()
     else:
         print("\nPackages not found in the queue for building\n")
Ejemplo n.º 17
0
    def showVideoDetails(self, stdscr, videoName, key):
        self.second_loop = False
        self.main_loop = False
        self.key = key
        self.videoName = videoName
        self.printVideoDetails(stdscr, self.videoName, self.key)

        while self.showVideo:
            key = stdscr.getch()
            if key == curses.KEY_ENTER or key in [10, 13]:
                webbrowser.open(self.rssFeeds[self.current_row][self.key][self.videoName][1])
            elif key == curses.KEY_LEFT:
                self.second_loop = True
                self.showChannel(stdscr)
            elif key == curses.KEY_RIGHT:
                Download.mp4Download(self.rssFeeds[self.current_row][self.key][self.videoName][1],
                self.videoName)
            elif key == curses.KEY_DOWN:
                Download.mp3Download(self.rssFeeds[self.current_row][self.key][self.videoName][1],
                self.videoName)
                stdscr.clear()
            self.printVideoDetails(stdscr, self.videoName, self.key)
            stdscr.refresh()
Ejemplo n.º 18
0
    def update_(self) -> None:
        """
        Download new json, new thumbnails and delete old ones
        with clearing stacked widget
        """
        for file in THUMBS_DIR.iterdir():
            logger.debug(f'Deleting {short_path(file)}')
            file.unlink()

        # Clear stacked widget
        for _ in range(self.sw.count()):
            widget = self.sw.widget(0)
            self.sw.removeWidget(widget)
            del widget

        download = Download(JSON_FILE,
                            APP_DIR,
                            SEARCH_URL,
                            payload=self.payload)
        download.save()

        self.progressbar.show()
        self.download_thumbs()
Ejemplo n.º 19
0
	def _fetch_CDN_(self,resp):		
		if 'alt="Upgrade to Pornhub Premium to enjoy this video."' in resp:
			#upgrade to premium message with nothing to fetch just remove that link from file and move on
			return True
		if 'var player_quality_' in resp:			
			p720 = resp.find('var player_quality_720p = \'')
			if p720 == -1:
				p420 = resp.find('var player_quality_480p = \'')
				if p420 == -1:
					p240 = resp.find('var player_quality_240p = \'')
					if p240 == -1:
						#nothing is there
						print("\n[None] No Video Format could be found -- Removing the Link")
						return True
					else:
						print("[FETCHED -- 240px]")
						start = p240 + 27
						end = resp.find('\'',p240+30)
				else:
					print("[FETCHED -- 420px]")
					start = p420 + 27
					end = resp.find('\'',p420+30)
			else:
				print("[FETCHED -- 720px]")
				start = p720 + 27
				end = resp.find('\'',p720+30)
			#print resp[start:end]				
			file_name = BeautifulSoup(resp,"html.parser")
			file_name = str(file_name.title.string)
			file_name = file_name.translate(None,"'*:\"\/?<>|")
			download = Download(resp[start:end],"%s.mp4"%(file_name))
			download = download.now()			
			if download:				
				return True
			return False
		else:
			pass
Ejemplo n.º 20
0
def threaded_crawler(seed_url,
                     delay=5,
                     cache=None,
                     scrape_callback=None,
                     user_agent='wswp',
                     proxies=None,
                     num_retries=1,
                     max_threads=10,
                     timeout=60):
    crawl_queue = MongoQueue()
    crawl_queue.clear()
    crawl_queue.push(seed_url)
    D = Download(cache=cache,
                 delay=delay,
                 user_agent=user_agent,
                 proxies=proxies,
                 num_retries=num_retries,
                 timeout=timeout)

    def process_queue():
        while True:
            try:
                url = crawl_queue.pop()
            except IndexError:
                break
            else:
                html = D(url)
                if scrape_callback:
                    try:
                        links = scrape_callback(url, html) or []
                    except Exception as e:
                        print(f'Error in callback for:{url}:{e}')
                    else:
                        for link in links:
                            crawl_queue.push(normalize(seed_url, link))
                crawl_queue.complete(url)

    threads = []
    while threads or crawl_queue:
        for thread in threads:
            if not thread.is_alive():
                threads.remove(thread)

        while len(threads) < max_threads and crawl_queue.peek():
            thread = threading.Thread(target=process_queue)
            thread.start()
            threads.append(thread)

            time.sleep(SLEEP_TIME)
Ejemplo n.º 21
0
    def get_log_downloads(self):
        logfile = os.path.join(config_dir, 'log.json')
        current_downloads = {}
        if os.path.exists(logfile):
            with open(logfile, 'r', encoding='utf-8') as f:
                try:
                    d = json.load(f)
                except BaseException:
                    pass
                else:
                    downloads = d.get('downloads', {})
                    for d in downloads:
                        download = Download(dict=d, config=self.config)
                        if not download.has_completed:
                            if self.config.auto_start:
                                download.start_download()
                            else:
                                download.get_tmppath()

                        current_downloads.update({download.url: download})

        return current_downloads
Ejemplo n.º 22
0
class AddDownloadDialog(wx.Dialog):
    def __init__(self, parent=None, title='', config=None, *args, **kwargs):
        super().__init__(parent, title=title, *args, **kwargs)

        self.ok = False

        self.config = config
        self.download = Download(config=self.config)

        url_label = wx.StaticText(self,
                                  label=_('链接地址'),
                                  style=wx.ALIGN_RIGHT | wx.ST_NO_AUTORESIZE,
                                  size=(100, -1))
        self.url_text = wx.TextCtrl(self, size=(400, -1))
        self.retrieve_btn = wx.Button(self, label='→')
        url_sizer = wx.BoxSizer(wx.HORIZONTAL)
        url_sizer.Add(url_label, 0, wx.ALL, 5)
        url_sizer.Add(self.url_text, 0, wx.ALL, 5)
        url_sizer.Add(self.retrieve_btn, 0, wx.ALL, 5)

        self.retrieve_btn.Disable()

        file_label = wx.StaticText(self,
                                   label=_('文件名称'),
                                   style=wx.ALIGN_RIGHT | wx.ST_NO_AUTORESIZE,
                                   size=(100, -1))
        self.filename_text = wx.TextCtrl(self, size=(400, -1))
        file_sizer = wx.BoxSizer(wx.HORIZONTAL)
        file_sizer.Add(file_label, 0, wx.ALL, 5)
        file_sizer.Add(self.filename_text, 0, wx.ALL, 5)

        size_label = wx.StaticText(self,
                                   label=_('文件大小'),
                                   style=wx.ALIGN_RIGHT | wx.ST_NO_AUTORESIZE,
                                   size=(100, -1))
        self.size_text = wx.TextCtrl(self,
                                     style=wx.TE_READONLY,
                                     size=(400, -1))
        size_sizer = wx.BoxSizer(wx.HORIZONTAL)
        size_sizer.Add(size_label, 0, wx.ALL, 5)
        size_sizer.Add(self.size_text, 0, wx.ALL, 5)

        proxy_label = wx.StaticText(self,
                                    label=_('代理地址'),
                                    style=wx.ALIGN_RIGHT | wx.ST_NO_AUTORESIZE,
                                    size=(100, -1))
        self.proxy_http_text = wx.TextCtrl(self, size=(195, -1))
        self.proxy_https_text = wx.TextCtrl(self, size=(195, -1))
        proxy_sizer = wx.BoxSizer(wx.HORIZONTAL)
        proxy_sizer.Add(proxy_label, 0, wx.ALL, 5)
        proxy_sizer.Add(self.proxy_http_text, 0, wx.ALL, 5)
        proxy_sizer.Add(self.proxy_https_text, 0, wx.ALL, 5)

        self.Bind(wx.EVT_TEXT, self.on_url_change, self.url_text)
        self.Bind(wx.EVT_BUTTON, self.on_url, self.retrieve_btn)
        self.Bind(wx.EVT_TEXT_ENTER, self.on_url, self.url_text)

        path_label = wx.StaticText(self,
                                   label=_('保存路径'),
                                   style=wx.ALIGN_RIGHT | wx.ST_NO_AUTORESIZE,
                                   size=(100, -1))
        self.path_text = wx.TextCtrl(self,
                                     size=(400, -1),
                                     style=wx.TE_READONLY)
        self.browse_btn = wx.Button(self, label=_('浏览'))

        path_sizer = wx.BoxSizer(wx.HORIZONTAL)
        path_sizer.Add(path_label, 0, wx.ALL, 5)
        path_sizer.Add(self.path_text, 0, wx.ALL, 5)
        path_sizer.Add(self.browse_btn, 0, wx.ALL, 5)

        dummy_label = wx.StaticText(self,
                                    label='',
                                    style=wx.ALIGN_RIGHT | wx.ST_NO_AUTORESIZE,
                                    size=(100, -1))
        self.auto_start_check = wx.CheckBox(self, label=_('自动开始下载'))
        self.auto_change_default_save_to_check = wx.CheckBox(
            self, label=_('自动改变默认保存路径'))

        check_sizer = wx.BoxSizer(wx.HORIZONTAL)
        check_sizer.Add(dummy_label, 0, wx.ALL, 5)
        check_sizer.Add(self.auto_start_check, 0, wx.ALL, 5)
        check_sizer.Add(self.auto_change_default_save_to_check, 0, wx.ALL, 5)

        self.Bind(wx.EVT_BUTTON, self.on_browse, self.browse_btn)

        btn_sizer = self.CreateSeparatedButtonSizer(wx.OK | wx.CANCEL)

        main_sizer = wx.BoxSizer(wx.VERTICAL)
        main_sizer.Add(url_sizer, 0, wx.ALL | wx.EXPAND, 5)
        main_sizer.Add(proxy_sizer, 0, wx.ALL | wx.EXPAND, 5)
        main_sizer.Add(file_sizer, 0, wx.ALL | wx.EXPAND, 5)
        main_sizer.Add(size_sizer, 0, wx.ALL | wx.EXPAND, 5)
        main_sizer.Add(path_sizer, 0, wx.ALL | wx.EXPAND, 5)
        main_sizer.Add(check_sizer, 0, wx.ALL | wx.EXPAND, 5)

        main_sizer.Add(btn_sizer, 0, wx.ALL | wx.EXPAND, 5)

        self.SetSizer(main_sizer)

        main_sizer.Fit(self)

        self.ok_btn = self.FindWindowById(wx.ID_OK)
        self.cancel_btn = self.FindWindowById(wx.ID_CANCEL)

        self.ok_btn.SetLabel(_('确定'))
        self.cancel_btn.SetLabel(_('取消'))

        # init control from config
        self.auto_start_check.SetValue(self.config.auto_start)
        self.auto_change_default_save_to_check.SetValue(
            self.config.auto_change_default_save_to)
        self.path_text.SetValue(self.config.default_save_to or os.getcwd())
        self.proxy_http_text.SetValue(self.config.last_used_http_proxy or '')
        self.proxy_https_text.SetValue(self.config.last_used_https_proxy or '')

        self.Bind(wx.EVT_BUTTON, self.on_ok, self.ok_btn)

        self.set_buttons()

    def set_buttons(self, enable=True):
        if enable:
            self.cancel_btn.Enable()
            self.browse_btn.Enable()
            self.filename_text.Enable()
        else:
            self.cancel_btn.Disable()
            self.browse_btn.Disable()
            self.filename_text.Disable()

        if enable and self.ok:
            self.ok_btn.Enable()
        else:
            self.ok_btn.Disable()

        if enable and helper.is_url(self.url_text.GetValue().strip()):
            self.retrieve_btn.Enable()
        else:
            self.retrieve_btn.Disable()

    def on_ok(self, event):
        filename = helper.sanitize(self.filename_text.GetValue())
        if not filename:
            wx.MessageDialog(self,
                             _('请输入有效的文件名'),
                             caption=_('恒睿下载'),
                             style=wx.OK | wx.CENTRE).ShowModal()
            return False
        if not self.ok:
            wx.MessageDialog(self,
                             _('请输入有效的下载地址'),
                             caption=_('恒睿下载'),
                             style=wx.OK | wx.CENTRE).ShowModal()
            return False

        self.download.http_proxy = self.proxy_http_text.GetValue()
        self.download.https_proxy = self.proxy_https_text.GetValue()
        self.download.save_to = self.path_text.GetValue() or os.getcwd()
        self.download.filename = filename

        if self.download.http_proxy:
            self.config.last_used_http_proxy = self.download.http_proxy
        if self.download.https_proxy:
            self.config.last_used_https_proxy = self.download.https_proxy
        self.config.auto_change_default_save_to = self.auto_change_default_save_to_check.GetValue(
        )
        self.config.auto_start = self.auto_start_check.GetValue()
        if self.config.auto_change_default_save_to:
            self.config.default_save_to = self.download.save_to

        self.config.write_config()

        event.Skip()

        return True

    def on_url_change(self, event):
        self.ok = False
        if helper.is_url(self.url_text.GetValue().strip()):
            self.retrieve_btn.Enable()
        else:
            self.retrieve_btn.Disable()

    def on_url(self, event):
        self.filename_text.SetValue(_('正在获取信息...'))
        self.set_buttons(False)

        self.download.http_proxy = self.proxy_http_text.GetValue()
        self.download.https_proxy = self.proxy_https_text.GetValue()
        self.download.fetch_file_info(self.url_text.GetValue())
        filename = self.download.filename
        if filename:
            strsize = helper.strsize(self.download.size)
            self.filename_text.SetValue(filename)
            self.size_text.SetValue(strsize)
            self.ok = True
        else:
            self.filename_text.SetValue(self.download.status)
            self.size_text.SetValue('')
            self.ok = False

        self.set_buttons()

    def on_browse(self, event):
        defaultDir = self.path_text.GetValue() or os.getcwd()
        dlg = wx.DirDialog(self, _("选择文件夹"), defaultDir)

        if dlg.ShowModal() == wx.ID_OK:
            self.path_text.SetValue(dlg.GetPath())

    def clear(self):
        self.url_text.SetValue('')
        self.filename_text.SetValue('')
        self.size_text.SetValue('')
        self.download = Download(config=self.config)
Ejemplo n.º 23
0
 def clear(self):
     self.url_text.SetValue('')
     self.filename_text.SetValue('')
     self.size_text.SetValue('')
     self.download = Download(config=self.config)
Ejemplo n.º 24
0
 def download_config(self, config_url: str, file_name: str = None):
     downloader = Download(config_url, file_name)
     downloader.download()
Ejemplo n.º 25
0
# _*_ coding: utf-8 _*_
__author__ = 'LelandYan'
__date__ = '2018/9/23 8:51'

import lxml.html
from downloader import Download
import json
import string
import csv

FIELDS = ('area', 'population', 'iso', 'country', 'capital', 'continent',
          'tld', 'currency_name', 'phone', 'postal_code_format',
          'postal_code_regex', 'languages', 'neighbours')
writer = csv.writer(open('countries2.txt', 'w', newline=""))
writer.writerow(('country', ))
D = Download()
html = D(
    'http://example.webscraping.com/places/ajax/search.json?&search_term=.&page_size=1000&page=0'
)
ajax = json.loads(html)
for record in ajax['records']:
    row = record['country']
    writer.writerow((row, ))
# url = 'http://example.webscraping.com/places/ajax/search.json?&search_term={}&page_size=20&page={}'
# countries = set()
# for letter in string.ascii_lowercase:
#     page = 0
#     while True:
#         html = D(url.format(letter, page))
#         try:
#             ajax = json.loads(html)
Ejemplo n.º 26
0
    def _fetch_CDN_(self, resp):
        resp = str(resp)
        if str(resp).find(
                "alt=\"Upgrade to Pornhub Premium to enjoy this video.\""
        ) != -1:
            # There is nothing to fetch, then "Upgrade to Pornhub Premium" appears
            return True
        import re
        regex = r"(var flashvars_)(.*?)(=)(.*?)(};)"
        match = re.findall(regex, resp)[0][3]
        json = f"{match.strip()}{'}'}"
        json = re.sub('("embedCode":"<iframe src=)(.*?)(iframe>",)', '', json)
        json = re.sub('({"disable_sharebar")(.*?)("mediaDefinitions":)', '',
                      json)
        json = re.sub('(,"video_unavailable_country")(.*)("})', '', json)
        json = jsonpickle.decode(
            json.replace("\\\\\"",
                         "\"").replace("\\\\/",
                                       "//").replace("\/\"",
                                                     "\"").replace("//", "/"))
        definition, link = "", ""
        l_1080, l_720, l_480, l_280 = None, None, None, None
        for d in json:
            q = int(d["quality"])
            if q == 1080:
                l_1080 = d["videoUrl"]
            if q == 720:
                l_720 = d["videoUrl"]
            if q == 480:
                l_480 = d["videoUrl"]
            if q == 280:
                l_280 = d["videoUrl"]

        config = {
            "download": {
                "1080": True,
                "720": True,
                "480": True,
                "280": True
            }
        }
        if not os.path.exists("./config.json"):
            with open("./config.json", "w") as cfg:
                cfg.write(
                    '{"download":{"1080":true, "720":true, "480":true, "280":true}}'
                )
        else:
            with open("./config.json", "r+") as cfg:
                config = jsonpickle.decode(cfg.read())

        if l_1080 is not None and config["download"]["1080"]:
            link = l_1080
            definition = "_1080P"
            print("Found video in 1080P")
        elif l_720 is not None and config["download"]["720"]:
            link = l_720
            definition = "_720P"
            print("Found video in 720")
        elif l_480 is not None and config["download"]["480"]:
            link = l_480
            definition = "_480P"
            print("Found video in 480")
        elif l_280 is not None and config["download"]["280"]:
            link = l_280
            definition = "_280P"
            print("Found video in 280")
        else:
            print("No video found")
            return True
        file_name = BeautifulSoup(resp, "html.parser")
        file_name = str(file_name.title.string)
        for bc in "'*:\"\/?<>|":
            file_name = file_name.replace(bc, " ")
        file_name = file_name.replace(" - Pornhub.com", "")
        download = Download(link, f"{file_name}{definition}.mp4")
        download = download.now()
        if download:
            return True
        return False
Ejemplo n.º 27
0
    def __init__(self, parent=None, title='', config=None, *args, **kwargs):
        super().__init__(parent, title=title, *args, **kwargs)

        self.ok = False

        self.config = config
        self.download = Download(config=self.config)

        url_label = wx.StaticText(self,
                                  label=_('链接地址'),
                                  style=wx.ALIGN_RIGHT | wx.ST_NO_AUTORESIZE,
                                  size=(100, -1))
        self.url_text = wx.TextCtrl(self, size=(400, -1))
        self.retrieve_btn = wx.Button(self, label='→')
        url_sizer = wx.BoxSizer(wx.HORIZONTAL)
        url_sizer.Add(url_label, 0, wx.ALL, 5)
        url_sizer.Add(self.url_text, 0, wx.ALL, 5)
        url_sizer.Add(self.retrieve_btn, 0, wx.ALL, 5)

        self.retrieve_btn.Disable()

        file_label = wx.StaticText(self,
                                   label=_('文件名称'),
                                   style=wx.ALIGN_RIGHT | wx.ST_NO_AUTORESIZE,
                                   size=(100, -1))
        self.filename_text = wx.TextCtrl(self, size=(400, -1))
        file_sizer = wx.BoxSizer(wx.HORIZONTAL)
        file_sizer.Add(file_label, 0, wx.ALL, 5)
        file_sizer.Add(self.filename_text, 0, wx.ALL, 5)

        size_label = wx.StaticText(self,
                                   label=_('文件大小'),
                                   style=wx.ALIGN_RIGHT | wx.ST_NO_AUTORESIZE,
                                   size=(100, -1))
        self.size_text = wx.TextCtrl(self,
                                     style=wx.TE_READONLY,
                                     size=(400, -1))
        size_sizer = wx.BoxSizer(wx.HORIZONTAL)
        size_sizer.Add(size_label, 0, wx.ALL, 5)
        size_sizer.Add(self.size_text, 0, wx.ALL, 5)

        proxy_label = wx.StaticText(self,
                                    label=_('代理地址'),
                                    style=wx.ALIGN_RIGHT | wx.ST_NO_AUTORESIZE,
                                    size=(100, -1))
        self.proxy_http_text = wx.TextCtrl(self, size=(195, -1))
        self.proxy_https_text = wx.TextCtrl(self, size=(195, -1))
        proxy_sizer = wx.BoxSizer(wx.HORIZONTAL)
        proxy_sizer.Add(proxy_label, 0, wx.ALL, 5)
        proxy_sizer.Add(self.proxy_http_text, 0, wx.ALL, 5)
        proxy_sizer.Add(self.proxy_https_text, 0, wx.ALL, 5)

        self.Bind(wx.EVT_TEXT, self.on_url_change, self.url_text)
        self.Bind(wx.EVT_BUTTON, self.on_url, self.retrieve_btn)
        self.Bind(wx.EVT_TEXT_ENTER, self.on_url, self.url_text)

        path_label = wx.StaticText(self,
                                   label=_('保存路径'),
                                   style=wx.ALIGN_RIGHT | wx.ST_NO_AUTORESIZE,
                                   size=(100, -1))
        self.path_text = wx.TextCtrl(self,
                                     size=(400, -1),
                                     style=wx.TE_READONLY)
        self.browse_btn = wx.Button(self, label=_('浏览'))

        path_sizer = wx.BoxSizer(wx.HORIZONTAL)
        path_sizer.Add(path_label, 0, wx.ALL, 5)
        path_sizer.Add(self.path_text, 0, wx.ALL, 5)
        path_sizer.Add(self.browse_btn, 0, wx.ALL, 5)

        dummy_label = wx.StaticText(self,
                                    label='',
                                    style=wx.ALIGN_RIGHT | wx.ST_NO_AUTORESIZE,
                                    size=(100, -1))
        self.auto_start_check = wx.CheckBox(self, label=_('自动开始下载'))
        self.auto_change_default_save_to_check = wx.CheckBox(
            self, label=_('自动改变默认保存路径'))

        check_sizer = wx.BoxSizer(wx.HORIZONTAL)
        check_sizer.Add(dummy_label, 0, wx.ALL, 5)
        check_sizer.Add(self.auto_start_check, 0, wx.ALL, 5)
        check_sizer.Add(self.auto_change_default_save_to_check, 0, wx.ALL, 5)

        self.Bind(wx.EVT_BUTTON, self.on_browse, self.browse_btn)

        btn_sizer = self.CreateSeparatedButtonSizer(wx.OK | wx.CANCEL)

        main_sizer = wx.BoxSizer(wx.VERTICAL)
        main_sizer.Add(url_sizer, 0, wx.ALL | wx.EXPAND, 5)
        main_sizer.Add(proxy_sizer, 0, wx.ALL | wx.EXPAND, 5)
        main_sizer.Add(file_sizer, 0, wx.ALL | wx.EXPAND, 5)
        main_sizer.Add(size_sizer, 0, wx.ALL | wx.EXPAND, 5)
        main_sizer.Add(path_sizer, 0, wx.ALL | wx.EXPAND, 5)
        main_sizer.Add(check_sizer, 0, wx.ALL | wx.EXPAND, 5)

        main_sizer.Add(btn_sizer, 0, wx.ALL | wx.EXPAND, 5)

        self.SetSizer(main_sizer)

        main_sizer.Fit(self)

        self.ok_btn = self.FindWindowById(wx.ID_OK)
        self.cancel_btn = self.FindWindowById(wx.ID_CANCEL)

        self.ok_btn.SetLabel(_('确定'))
        self.cancel_btn.SetLabel(_('取消'))

        # init control from config
        self.auto_start_check.SetValue(self.config.auto_start)
        self.auto_change_default_save_to_check.SetValue(
            self.config.auto_change_default_save_to)
        self.path_text.SetValue(self.config.default_save_to or os.getcwd())
        self.proxy_http_text.SetValue(self.config.last_used_http_proxy or '')
        self.proxy_https_text.SetValue(self.config.last_used_https_proxy or '')

        self.Bind(wx.EVT_BUTTON, self.on_ok, self.ok_btn)

        self.set_buttons()